From 2024f91e965f7d7a38364874031c2132dcd11992 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 19 Aug 2020 13:47:15 +0200 Subject: ns: Add a common refcount into ns_common Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner [christian.brauner@ubuntu.com: rewrite commit & split into two patches] Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 5fbc4000358f..0f1d024bd958 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -2,12 +2,15 @@ #ifndef _LINUX_NS_COMMON_H #define _LINUX_NS_COMMON_H +#include + struct proc_ns_operations; struct ns_common { atomic_long_t stashed; const struct proc_ns_operations *ops; unsigned int inum; + refcount_t count; }; #endif -- cgit v1.2.3 From 9a56493f6942c0e2df1579986128721da96e00d8 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:16:21 +0300 Subject: uts: Use generic ns_common::count Switch over uts namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644978167.604812.1773586504374412107.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/utsname.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 44429d9142ca..2b1737c9b244 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -4,7 +4,6 @@ #include -#include #include #include #include @@ -22,7 +21,6 @@ struct user_namespace; extern struct user_namespace init_user_ns; struct uts_namespace { - struct kref kref; struct new_utsname name; struct user_namespace *user_ns; struct ucounts *ucounts; @@ -33,16 +31,17 @@ extern struct uts_namespace init_uts_ns; #ifdef CONFIG_UTS_NS static inline void get_uts_ns(struct uts_namespace *ns) { - kref_get(&ns->kref); + refcount_inc(&ns->ns.count); } extern struct uts_namespace *copy_utsname(unsigned long flags, struct user_namespace *user_ns, struct uts_namespace *old_ns); -extern void free_uts_ns(struct kref *kref); +extern void free_uts_ns(struct uts_namespace *ns); static inline void put_uts_ns(struct uts_namespace *ns) { - kref_put(&ns->kref, free_uts_ns); + if (refcount_dec_and_test(&ns->ns.count)) + free_uts_ns(ns); } void uts_ns_init(void); -- cgit v1.2.3 From 137ec390fad41928307216ea9f91acf5cf6f4204 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:16:27 +0300 Subject: ipc: Use generic ns_common::count Switch over ipc namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644978697.604812.16592754423881032385.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/ipc_namespace.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index a06a78c67f19..05e22770af51 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -27,7 +27,6 @@ struct ipc_ids { }; struct ipc_namespace { - refcount_t count; struct ipc_ids ids[3]; int sem_ctls[4]; @@ -128,7 +127,7 @@ extern struct ipc_namespace *copy_ipcs(unsigned long flags, static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) - refcount_inc(&ns->count); + refcount_inc(&ns->ns.count); return ns; } -- cgit v1.2.3 From 8eb71d95f34a009cc22084e05e78eb9686f7ea28 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:16:32 +0300 Subject: pid: Use generic ns_common::count Switch over pid namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644979226.604812.7512601754841882036.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/pid_namespace.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 5a5cb45ac57e..7c7e627503d2 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -18,7 +17,6 @@ struct fs_pin; struct pid_namespace { - struct kref kref; struct idr idr; struct rcu_head rcu; unsigned int pid_allocated; @@ -43,7 +41,7 @@ extern struct pid_namespace init_pid_ns; static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) { if (ns != &init_pid_ns) - kref_get(&ns->kref); + refcount_inc(&ns->ns.count); return ns; } -- cgit v1.2.3 From 265cbd62e034cb09a9da7cbff9072c8082f8df65 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:16:37 +0300 Subject: user: Use generic ns_common::count Switch over user namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644979754.604812.601625186726406922.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/user_namespace.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 6ef1c7109fc4..64cf8ebdc4ec 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -57,7 +57,6 @@ struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; struct uid_gid_map projid_map; - atomic_t count; struct user_namespace *parent; int level; kuid_t owner; @@ -109,7 +108,7 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type); static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) - atomic_inc(&ns->count); + refcount_inc(&ns->ns.count); return ns; } @@ -119,7 +118,7 @@ extern void __put_user_ns(struct user_namespace *ns); static inline void put_user_ns(struct user_namespace *ns) { - if (ns && atomic_dec_and_test(&ns->count)) + if (ns && refcount_dec_and_test(&ns->ns.count)) __put_user_ns(ns); } -- cgit v1.2.3 From f387882d8d3eda7c7b13e330c73907035569ce4a Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:16:50 +0300 Subject: cgroup: Use generic ns_common::count Switch over cgroup namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644980994.604812.383801057081594972.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/cgroup.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 618838c48313..451c2d26a5db 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -854,7 +854,6 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ struct cgroup_namespace { - refcount_t count; struct ns_common ns; struct user_namespace *user_ns; struct ucounts *ucounts; @@ -889,12 +888,12 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, static inline void get_cgroup_ns(struct cgroup_namespace *ns) { if (ns) - refcount_inc(&ns->count); + refcount_inc(&ns->ns.count); } static inline void put_cgroup_ns(struct cgroup_namespace *ns) { - if (ns && refcount_dec_and_test(&ns->count)) + if (ns && refcount_dec_and_test(&ns->ns.count)) free_cgroup_ns(ns); } -- cgit v1.2.3 From 28c41efd08bf97fc64f75304035ee3943995b68e Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:17:00 +0300 Subject: time: Use generic ns_common::count Switch over time namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644982033.604812.9406853013011123238.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/time_namespace.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 5b6031385db0..a51ffc089219 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -4,7 +4,6 @@ #include -#include #include #include #include @@ -18,7 +17,6 @@ struct timens_offsets { }; struct time_namespace { - struct kref kref; struct user_namespace *user_ns; struct ucounts *ucounts; struct ns_common ns; @@ -37,20 +35,21 @@ extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { - kref_get(&ns->kref); + refcount_inc(&ns->ns.count); return ns; } struct time_namespace *copy_time_ns(unsigned long flags, struct user_namespace *user_ns, struct time_namespace *old_ns); -void free_time_ns(struct kref *kref); +void free_time_ns(struct time_namespace *ns); int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); struct vdso_data *arch_get_vdso_data(void *vvar_page); static inline void put_time_ns(struct time_namespace *ns) { - kref_put(&ns->kref, free_time_ns); + if (refcount_dec_and_test(&ns->ns.count)) + free_time_ns(ns); } void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m); -- cgit v1.2.3 From 01fd30da0474d5528d96a154cb50ce3b0352fd19 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Sep 2020 13:55:58 +0200 Subject: dma-buf: Add struct dma-buf-map for storing struct dma_buf.vaddr_ptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new type struct dma_buf_map represents a mapping of dma-buf memory into kernel space. It contains a flag, is_iomem, that signals users to access the mapped memory with I/O operations instead of regular loads and stores. It was assumed that DMA buffer memory can be accessed with regular load and store operations. Some architectures, such as sparc64, require the use of I/O operations to access dma-map buffers that are located in I/O memory. Providing struct dma_buf_map allows drivers to implement this. This was specifically a problem when refreshing the graphics framebuffer on such systems. [1] As the first step, struct dma_buf stores an instance of struct dma_buf_map internally. Afterwards, dma-buf's vmap and vunmap interfaces are be converted. Finally, affected drivers can be fixed. v3: * moved documentation into separate patch * test for NULL pointers with ! [1] https://lore.kernel.org/dri-devel/20200725191012.GA434957@ravnborg.org/ Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Daniel Vetter Acked-by: Sumit Semwal Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200925115601.23955-2-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 82 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/dma-buf.h | 3 +- 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 include/linux/dma-buf-map.h (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h new file mode 100644 index 000000000000..00143c88feb6 --- /dev/null +++ b/include/linux/dma-buf-map.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Pointer to dma-buf-mapped memory, plus helpers. + */ + +#ifndef __DMA_BUF_MAP_H__ +#define __DMA_BUF_MAP_H__ + +#include + +/** + * struct dma_buf_map - Pointer to vmap'ed dma-buf memory. + * @vaddr_iomem: The buffer's address if in I/O memory + * @vaddr: The buffer's address if in system memory + * @is_iomem: True if the dma-buf memory is located in I/O + * memory, or false otherwise. + */ +struct dma_buf_map { + union { + void __iomem *vaddr_iomem; + void *vaddr; + }; + bool is_iomem; +}; + +/* API transition helper */ +static inline bool dma_buf_map_is_vaddr(const struct dma_buf_map *map, const void *vaddr) +{ + return !map->is_iomem && (map->vaddr == vaddr); +} + +/** + * dma_buf_map_is_null - Tests for a dma-buf mapping to be NULL + * @map: The dma-buf mapping structure + * + * Depending on the state of struct dma_buf_map.is_iomem, tests if the + * mapping is NULL. + * + * Returns: + * True if the mapping is NULL, or false otherwise. + */ +static inline bool dma_buf_map_is_null(const struct dma_buf_map *map) +{ + if (map->is_iomem) + return !map->vaddr_iomem; + return !map->vaddr; +} + +/** + * dma_buf_map_is_set - Tests is the dma-buf mapping has been set + * @map: The dma-buf mapping structure + * + * Depending on the state of struct dma_buf_map.is_iomem, tests if the + * mapping has been set. + * + * Returns: + * True if the mapping is been set, or false otherwise. + */ +static inline bool dma_buf_map_is_set(const struct dma_buf_map *map) +{ + return !dma_buf_map_is_null(map); +} + +/** + * dma_buf_map_clear - Clears a dma-buf mapping structure + * @map: The dma-buf mapping structure + * + * Clears all fields to zero; including struct dma_buf_map.is_iomem. So + * mapping structures that were set to point to I/O memory are reset for + * system memory. Pointers are cleared to NULL. This is the default. + */ +static inline void dma_buf_map_clear(struct dma_buf_map *map) +{ + if (map->is_iomem) { + map->vaddr_iomem = NULL; + map->is_iomem = false; + } else { + map->vaddr = NULL; + } +} + +#endif /* __DMA_BUF_MAP_H__ */ diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 957b398d30e5..fcc2ddfb6d18 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -13,6 +13,7 @@ #ifndef __DMA_BUF_H__ #define __DMA_BUF_H__ +#include #include #include #include @@ -309,7 +310,7 @@ struct dma_buf { const struct dma_buf_ops *ops; struct mutex lock; unsigned vmapping_counter; - void *vmap_ptr; + struct dma_buf_map vmap_ptr; const char *exp_name; const char *name; spinlock_t name_lock; -- cgit v1.2.3 From 6619ccf1bb1d0ebb071f758111efa83918b216fc Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Sep 2020 13:55:59 +0200 Subject: dma-buf: Use struct dma_buf_map in dma_buf_vmap() interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch updates dma_buf_vmap() and dma-buf's vmap callback to use struct dma_buf_map. The interfaces used to return a buffer address. This address now gets stored in an instance of the structure that is given as an additional argument. The functions return an errno code on errors. Users of the functions are updated accordingly. This is only an interface change. It is currently expected that dma-buf memory can be accessed with system memory load/store operations. v3: * update fastrpc driver (kernel test robot) v2: * always clear map parameter in dma_buf_vmap() (Daniel) * include dma-buf-heaps and i915 selftests (kernel test robot) Signed-off-by: Thomas Zimmermann Acked-by: Sumit Semwal Acked-by: Christian König Acked-by: Daniel Vetter Acked-by: Tomasz Figa Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200925115601.23955-3-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 13 +++++++++++++ include/linux/dma-buf.h | 6 +++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index 00143c88feb6..5ae732d373eb 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -23,6 +23,19 @@ struct dma_buf_map { bool is_iomem; }; +/** + * dma_buf_map_set_vaddr - Sets a dma-buf mapping structure to an address in system memory + * @map: The dma-buf mapping structure + * @vaddr: A system-memory address + * + * Sets the address and clears the I/O-memory flag. + */ +static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr) +{ + map->vaddr = vaddr; + map->is_iomem = false; +} + /* API transition helper */ static inline bool dma_buf_map_is_vaddr(const struct dma_buf_map *map, const void *vaddr) { diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index fcc2ddfb6d18..7237997cfa38 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -266,7 +266,7 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma); - void *(*vmap)(struct dma_buf *); + int (*vmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); void (*vunmap)(struct dma_buf *, void *vaddr); }; @@ -503,6 +503,6 @@ int dma_buf_end_cpu_access(struct dma_buf *dma_buf, int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); -void *dma_buf_vmap(struct dma_buf *); -void dma_buf_vunmap(struct dma_buf *, void *vaddr); +int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); +void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr); #endif /* __DMA_BUF_H__ */ -- cgit v1.2.3 From 20e76f1a70596590dec32a5d1f598fba04859526 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Sep 2020 13:56:00 +0200 Subject: dma-buf: Use struct dma_buf_map in dma_buf_vunmap() interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch updates dma_buf_vunmap() and dma-buf's vunmap callback to use struct dma_buf_map. The interfaces used to receive a buffer address. This address is now given in an instance of the structure. Users of the functions are updated accordingly. This is only an interface change. It is currently expected that dma-buf memory can be accessed with system memory load/store operations. v2: * include dma-buf-heaps and i915 selftests (kernel test robot) * initialize cma_obj before using it in drm_gem_cma_free_object() (kernel test robot) Signed-off-by: Thomas Zimmermann Acked-by: Sumit Semwal Acked-by: Christian König Acked-by: Daniel Vetter Acked-by: Tomasz Figa Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200925115601.23955-4-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 32 +++++++++++++++++++++++++++++--- include/linux/dma-buf.h | 4 ++-- 2 files changed, 31 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index 5ae732d373eb..c173a4abf4ba 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -23,6 +23,16 @@ struct dma_buf_map { bool is_iomem; }; +/** + * DMA_BUF_MAP_INIT_VADDR - Initializes struct dma_buf_map to an address in system memory + * @vaddr: A system-memory address + */ +#define DMA_BUF_MAP_INIT_VADDR(vaddr_) \ + { \ + .vaddr = (vaddr_), \ + .is_iomem = false, \ + } + /** * dma_buf_map_set_vaddr - Sets a dma-buf mapping structure to an address in system memory * @map: The dma-buf mapping structure @@ -36,10 +46,26 @@ static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr) map->is_iomem = false; } -/* API transition helper */ -static inline bool dma_buf_map_is_vaddr(const struct dma_buf_map *map, const void *vaddr) +/** + * dma_buf_map_is_equal - Compares two dma-buf mapping structures for equality + * @lhs: The dma-buf mapping structure + * @rhs: A dma-buf mapping structure to compare with + * + * Two dma-buf mapping structures are equal if they both refer to the same type of memory + * and to the same address within that memory. + * + * Returns: + * True is both structures are equal, or false otherwise. + */ +static inline bool dma_buf_map_is_equal(const struct dma_buf_map *lhs, + const struct dma_buf_map *rhs) { - return !map->is_iomem && (map->vaddr == vaddr); + if (lhs->is_iomem != rhs->is_iomem) + return false; + else if (lhs->is_iomem) + return lhs->vaddr_iomem == rhs->vaddr_iomem; + else + return lhs->vaddr == rhs->vaddr; } /** diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 7237997cfa38..cf77cc15f4ba 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -267,7 +267,7 @@ struct dma_buf_ops { int (*mmap)(struct dma_buf *, struct vm_area_struct *vma); int (*vmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); - void (*vunmap)(struct dma_buf *, void *vaddr); + void (*vunmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); }; /** @@ -504,5 +504,5 @@ int dma_buf_end_cpu_access(struct dma_buf *dma_buf, int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); -void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr); +void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map); #endif /* __DMA_BUF_H__ */ -- cgit v1.2.3 From ccc22d41bd9acec58cdc7c3b012e1887aba40af4 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Sep 2020 13:56:01 +0200 Subject: dma-buf: Document struct dma_buf_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds struct dma_buf_map and its helpers to the documentation. A short tutorial is included. v3: * update documentation in a separate patch * expand docs (Daniel) * carry-over acks from patch 1 Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Daniel Vetter Acked-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20200925115601.23955-5-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 72 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index c173a4abf4ba..fd1aba545fdf 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -8,6 +8,78 @@ #include +/** + * DOC: overview + * + * Calling dma-buf's vmap operation returns a pointer to the buffer's memory. + * Depending on the location of the buffer, users may have to access it with + * I/O operations or memory load/store operations. For example, copying to + * system memory could be done with memcpy(), copying to I/O memory would be + * done with memcpy_toio(). + * + * .. code-block:: c + * + * void *vaddr = ...; // pointer to system memory + * memcpy(vaddr, src, len); + * + * void *vaddr_iomem = ...; // pointer to I/O memory + * memcpy_toio(vaddr, _iomem, src, len); + * + * When using dma-buf's vmap operation, the returned pointer is encoded as + * :c:type:`struct dma_buf_map `. + * :c:type:`struct dma_buf_map ` stores the buffer's address in + * system or I/O memory and a flag that signals the required method of + * accessing the buffer. Use the returned instance and the helper functions + * to access the buffer's memory in the correct way. + * + * Open-coding access to :c:type:`struct dma_buf_map ` is + * considered bad style. Rather then accessing its fields directly, use one + * of the provided helper functions, or implement your own. For example, + * instances of :c:type:`struct dma_buf_map ` can be initialized + * statically with DMA_BUF_MAP_INIT_VADDR(), or at runtime with + * dma_buf_map_set_vaddr(). These helpers will set an address in system memory. + * + * .. code-block:: c + * + * struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(0xdeadbeaf); + * + * dma_buf_map_set_vaddr(&map. 0xdeadbeaf); + * + * Test if a mapping is valid with either dma_buf_map_is_set() or + * dma_buf_map_is_null(). + * + * .. code-block:: c + * + * if (dma_buf_map_is_set(&map) != dma_buf_map_is_null(&map)) + * // always true + * + * Instances of :c:type:`struct dma_buf_map ` can be compared + * for equality with dma_buf_map_is_equal(). Mappings the point to different + * memory spaces, system or I/O, are never equal. That's even true if both + * spaces are located in the same address space, both mappings contain the + * same address value, or both mappings refer to NULL. + * + * .. code-block:: c + * + * struct dma_buf_map sys_map; // refers to system memory + * struct dma_buf_map io_map; // refers to I/O memory + * + * if (dma_buf_map_is_equal(&sys_map, &io_map)) + * // always false + * + * Instances of struct dma_buf_map do not have to be cleaned up, but + * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings + * always refer to system memory. + * + * The type :c:type:`struct dma_buf_map ` and its helpers are + * actually independent from the dma-buf infrastructure. When sharing buffers + * among devices, drivers have to know the location of the memory to access + * the buffers in a safe way. :c:type:`struct dma_buf_map ` + * solves this problem for dma-buf and its users. If other drivers or + * sub-systems require similar functionality, the type could be generalized + * and moved to a more prominent header file. + */ + /** * struct dma_buf_map - Pointer to vmap'ed dma-buf memory. * @vaddr_iomem: The buffer's address if in I/O memory -- cgit v1.2.3 From ead1c9f376dbb2805796098ed6d2a70921c77ee5 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Wed, 30 Sep 2020 16:50:48 +0300 Subject: iio: adc: at91_adc: remove platform data and move defs in driver file The AT91 ADC driver no longer uses the 'at91_add_device_adc' platform data type. This is no longer used (at least in mainline boards). This change removes the platform-data initialization from the driver, since it is mostly dead code now. Some definitions [from the platform data at91_adc.h include] have been moved in the driver, since they are needed in the driver. Signed-off-by: Alexandru Ardelean Reviewed-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200930135048.11530-5-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/at91_adc.h | 49 ---------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 include/linux/platform_data/at91_adc.h (limited to 'include/linux') diff --git a/include/linux/platform_data/at91_adc.h b/include/linux/platform_data/at91_adc.h deleted file mode 100644 index f20eaeb827ce..000000000000 --- a/include/linux/platform_data/at91_adc.h +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2011 Free Electrons - */ - -#ifndef _AT91_ADC_H_ -#define _AT91_ADC_H_ - -enum atmel_adc_ts_type { - ATMEL_ADC_TOUCHSCREEN_NONE = 0, - ATMEL_ADC_TOUCHSCREEN_4WIRE = 4, - ATMEL_ADC_TOUCHSCREEN_5WIRE = 5, -}; - -/** - * struct at91_adc_trigger - description of triggers - * @name: name of the trigger advertised to the user - * @value: value to set in the ADC's trigger setup register - to enable the trigger - * @is_external: Does the trigger rely on an external pin? - */ -struct at91_adc_trigger { - const char *name; - u8 value; - bool is_external; -}; - -/** - * struct at91_adc_data - platform data for ADC driver - * @channels_used: channels in use on the board as a bitmask - * @startup_time: startup time of the ADC in microseconds - * @trigger_list: Triggers available in the ADC - * @trigger_number: Number of triggers available in the ADC - * @use_external_triggers: does the board has external triggers availables - * @vref: Reference voltage for the ADC in millivolts - * @touchscreen_type: If a touchscreen is connected, its type (4 or 5 wires) - */ -struct at91_adc_data { - unsigned long channels_used; - u8 startup_time; - struct at91_adc_trigger *trigger_list; - u8 trigger_number; - bool use_external_triggers; - u16 vref; - enum atmel_adc_ts_type touchscreen_type; -}; - -extern void __init at91_add_device_adc(struct at91_adc_data *data); -#endif -- cgit v1.2.3 From 5483b8d5015bb366c372870cfe4448742082e41f Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Fri, 2 Oct 2020 11:27:23 +0300 Subject: iio: adc: ad7887: invert/rework external ref logic This change inverts/reworks the logic to use an external reference via a provided regulator. Now the driver tries to obtain a regulator. If one is found, then it is used. The rest of the driver logic already checks if there is a non-NULL reference to a regulator, so it should be fine. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201002082723.184810-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/ad7887.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/ad7887.h b/include/linux/platform_data/ad7887.h index 732af46b2d16..9b4dca6ae70b 100644 --- a/include/linux/platform_data/ad7887.h +++ b/include/linux/platform_data/ad7887.h @@ -13,13 +13,9 @@ * second input channel, and Vref is internally connected to Vdd. If set to * false the device is used in single channel mode and AIN1/Vref is used as * VREF input. - * @use_onchip_ref: Whether to use the onchip reference. If set to true the - * internal 2.5V reference is used. If set to false a external reference is - * used. */ struct ad7887_platform_data { bool en_dual; - bool use_onchip_ref; }; #endif /* IIO_ADC_AD7887_H_ */ -- cgit v1.2.3 From 28963f2f6b46d75bda8fed15bd5ce9923427a40d Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 1 Oct 2020 17:10:48 +0300 Subject: iio: adc: ad7298: rework external ref setup & remove platform data This change removes the old platform data for ad7298. It is only used to provide whether to use an external regulator as a reference. So, the logic is inverted a bit. The driver now tries to obtain a regulator. If one is provided, then the external ref is used. The rest of the logic should work as before. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201001141048.69050-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/ad7298.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/linux/platform_data/ad7298.h (limited to 'include/linux') diff --git a/include/linux/platform_data/ad7298.h b/include/linux/platform_data/ad7298.h deleted file mode 100644 index 3e0ffe2d5d3d..000000000000 --- a/include/linux/platform_data/ad7298.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AD7298 SPI ADC driver - * - * Copyright 2011 Analog Devices Inc. - */ - -#ifndef __LINUX_PLATFORM_DATA_AD7298_H__ -#define __LINUX_PLATFORM_DATA_AD7298_H__ - -/** - * struct ad7298_platform_data - Platform data for the ad7298 ADC driver - * @ext_ref: Whether to use an external reference voltage. - **/ -struct ad7298_platform_data { - bool ext_ref; -}; - -#endif /* IIO_ADC_AD7298_H_ */ -- cgit v1.2.3 From 223f4d9517f8d97721647297b1cde41241a45233 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 1 Oct 2020 17:10:04 +0300 Subject: iio: dac: ad7303: remove platform data header The information in the ad7303 platform_data header is unused, so it's dead code. This change removes it and it's inclusion from the driver. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201001141004.53846-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/ad7303.h | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 include/linux/platform_data/ad7303.h (limited to 'include/linux') diff --git a/include/linux/platform_data/ad7303.h b/include/linux/platform_data/ad7303.h deleted file mode 100644 index c2bd0a13bea1..000000000000 --- a/include/linux/platform_data/ad7303.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Analog Devices AD7303 DAC driver - * - * Copyright 2013 Analog Devices Inc. - */ - -#ifndef __IIO_ADC_AD7303_H__ -#define __IIO_ADC_AD7303_H__ - -/** - * struct ad7303_platform_data - AD7303 platform data - * @use_external_ref: If set to true use an external voltage reference connected - * to the REF pin, otherwise use the internal reference derived from Vdd. - */ -struct ad7303_platform_data { - bool use_external_ref; -}; - -#endif -- cgit v1.2.3 From 476c5818c37a7828d558f34ae01f0c32f8bfadde Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Aug 2020 22:03:13 +0900 Subject: llist: Add nonatomic __llist_add() and __llist_dell_all() We'll use these in the new, lockless kretprobes code. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870619318.1229682.13027387548510028721.stgit@devnote2 --- include/linux/llist.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/llist.h b/include/linux/llist.h index 2e9c7215882b..24f207b0190b 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -197,6 +197,16 @@ static inline struct llist_node *llist_next(struct llist_node *node) extern bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, struct llist_head *head); + +static inline bool __llist_add_batch(struct llist_node *new_first, + struct llist_node *new_last, + struct llist_head *head) +{ + new_last->next = head->first; + head->first = new_first; + return new_last->next == NULL; +} + /** * llist_add - add a new entry * @new: new entry to be added @@ -209,6 +219,11 @@ static inline bool llist_add(struct llist_node *new, struct llist_head *head) return llist_add_batch(new, new, head); } +static inline bool __llist_add(struct llist_node *new, struct llist_head *head) +{ + return __llist_add_batch(new, new, head); +} + /** * llist_del_all - delete all entries from lock-less list * @head: the head of lock-less list to delete all entries @@ -222,6 +237,14 @@ static inline struct llist_node *llist_del_all(struct llist_head *head) return xchg(&head->first, NULL); } +static inline struct llist_node *__llist_del_all(struct llist_head *head) +{ + struct llist_node *first = head->first; + + head->first = NULL; + return first; +} + extern struct llist_node *llist_del_first(struct llist_head *head); struct llist_node *llist_reverse_order(struct llist_node *head); -- cgit v1.2.3 From d741bf41d7c7db4898bacfcb020353cddc032fd8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Aug 2020 22:03:24 +0900 Subject: kprobes: Remove kretprobe hash The kretprobe hash is mostly superfluous, replace it with a per-task variable. This gets rid of the task hash and it's related locking. Note that this may change the kprobes module-exported API for kretprobe handlers. If any out-of-tree kretprobe user uses ri->rp, use get_kretprobe(ri) instead. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870620431.1229682.16325792502413731312.stgit@devnote2 --- include/linux/kprobes.h | 19 +++++++++++++++++-- include/linux/sched.h | 4 ++++ 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 5c8c271fa1e9..00cf4421efd5 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #ifdef CONFIG_KPROBES @@ -144,6 +145,11 @@ static inline int kprobe_ftrace(struct kprobe *p) * ignored, due to maxactive being too low. * */ +struct kretprobe_holder { + struct kretprobe *rp; + refcount_t ref; +}; + struct kretprobe { struct kprobe kp; kretprobe_handler_t handler; @@ -152,17 +158,18 @@ struct kretprobe { int nmissed; size_t data_size; struct hlist_head free_instances; + struct kretprobe_holder *rph; raw_spinlock_t lock; }; struct kretprobe_instance { union { + struct llist_node llist; struct hlist_node hlist; struct rcu_head rcu; }; - struct kretprobe *rp; + struct kretprobe_holder *rph; kprobe_opcode_t *ret_addr; - struct task_struct *task; void *fp; char data[]; }; @@ -221,6 +228,14 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, return ret; } +static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), + "Kretprobe is accessed from instance under preemptive context"); + + return READ_ONCE(ri->rph->rp); +} + #else /* CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) diff --git a/include/linux/sched.h b/include/linux/sched.h index afe01e232935..5911805cafde 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1315,6 +1315,10 @@ struct task_struct { struct callback_head mce_kill_me; #endif +#ifdef CONFIG_KRETPROBES + struct llist_head kretprobe_instances; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. -- cgit v1.2.3 From 29f006fdefe6f88abde973a0b0f20d2704e93fd4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Aug 2020 22:03:35 +0900 Subject: asm-generic/atomic: Add try_cmpxchg() fallbacks Only x86 provides try_cmpxchg() outside of the atomic_t interfaces, provide generic fallbacks to create this interface from the widely available cmpxchg() function. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Acked-by: Will Deacon Link: https://lore.kernel.org/r/159870621515.1229682.15506193091065001742.stgit@devnote2 --- include/linux/atomic-arch-fallback.h | 90 ++++++++++++++++++++++++++++++++---- include/linux/atomic-fallback.h | 90 ++++++++++++++++++++++++++++++++---- 2 files changed, 160 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic-arch-fallback.h b/include/linux/atomic-arch-fallback.h index bcb6aa27cfa6..a3dba31df01e 100644 --- a/include/linux/atomic-arch-fallback.h +++ b/include/linux/atomic-arch-fallback.h @@ -9,9 +9,9 @@ #include #ifndef arch_xchg_relaxed -#define arch_xchg_relaxed arch_xchg -#define arch_xchg_acquire arch_xchg -#define arch_xchg_release arch_xchg +#define arch_xchg_acquire arch_xchg +#define arch_xchg_release arch_xchg +#define arch_xchg_relaxed arch_xchg #else /* arch_xchg_relaxed */ #ifndef arch_xchg_acquire @@ -32,9 +32,9 @@ #endif /* arch_xchg_relaxed */ #ifndef arch_cmpxchg_relaxed -#define arch_cmpxchg_relaxed arch_cmpxchg -#define arch_cmpxchg_acquire arch_cmpxchg -#define arch_cmpxchg_release arch_cmpxchg +#define arch_cmpxchg_acquire arch_cmpxchg +#define arch_cmpxchg_release arch_cmpxchg +#define arch_cmpxchg_relaxed arch_cmpxchg #else /* arch_cmpxchg_relaxed */ #ifndef arch_cmpxchg_acquire @@ -55,9 +55,9 @@ #endif /* arch_cmpxchg_relaxed */ #ifndef arch_cmpxchg64_relaxed -#define arch_cmpxchg64_relaxed arch_cmpxchg64 -#define arch_cmpxchg64_acquire arch_cmpxchg64 -#define arch_cmpxchg64_release arch_cmpxchg64 +#define arch_cmpxchg64_acquire arch_cmpxchg64 +#define arch_cmpxchg64_release arch_cmpxchg64 +#define arch_cmpxchg64_relaxed arch_cmpxchg64 #else /* arch_cmpxchg64_relaxed */ #ifndef arch_cmpxchg64_acquire @@ -77,6 +77,76 @@ #endif /* arch_cmpxchg64_relaxed */ +#ifndef arch_try_cmpxchg_relaxed +#ifdef arch_try_cmpxchg +#define arch_try_cmpxchg_acquire arch_try_cmpxchg +#define arch_try_cmpxchg_release arch_try_cmpxchg +#define arch_try_cmpxchg_relaxed arch_try_cmpxchg +#endif /* arch_try_cmpxchg */ + +#ifndef arch_try_cmpxchg +#define arch_try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg */ + +#ifndef arch_try_cmpxchg_acquire +#define arch_try_cmpxchg_acquire(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg_acquire((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg_acquire */ + +#ifndef arch_try_cmpxchg_release +#define arch_try_cmpxchg_release(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg_release((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg_release */ + +#ifndef arch_try_cmpxchg_relaxed +#define arch_try_cmpxchg_relaxed(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg_relaxed((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg_relaxed */ + +#else /* arch_try_cmpxchg_relaxed */ + +#ifndef arch_try_cmpxchg_acquire +#define arch_try_cmpxchg_acquire(...) \ + __atomic_op_acquire(arch_try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef arch_try_cmpxchg_release +#define arch_try_cmpxchg_release(...) \ + __atomic_op_release(arch_try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef arch_try_cmpxchg +#define arch_try_cmpxchg(...) \ + __atomic_op_fence(arch_try_cmpxchg, __VA_ARGS__) +#endif + +#endif /* arch_try_cmpxchg_relaxed */ + #ifndef arch_atomic_read_acquire static __always_inline int arch_atomic_read_acquire(const atomic_t *v) @@ -2288,4 +2358,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 90cd26cfd69d2250303d654955a0cc12620fb91b +// cca554917d7ea73d5e3e7397dd70c484cad9b2c4 diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h index fd525c71d676..2a3f55d98be9 100644 --- a/include/linux/atomic-fallback.h +++ b/include/linux/atomic-fallback.h @@ -9,9 +9,9 @@ #include #ifndef xchg_relaxed -#define xchg_relaxed xchg -#define xchg_acquire xchg -#define xchg_release xchg +#define xchg_acquire xchg +#define xchg_release xchg +#define xchg_relaxed xchg #else /* xchg_relaxed */ #ifndef xchg_acquire @@ -32,9 +32,9 @@ #endif /* xchg_relaxed */ #ifndef cmpxchg_relaxed -#define cmpxchg_relaxed cmpxchg -#define cmpxchg_acquire cmpxchg -#define cmpxchg_release cmpxchg +#define cmpxchg_acquire cmpxchg +#define cmpxchg_release cmpxchg +#define cmpxchg_relaxed cmpxchg #else /* cmpxchg_relaxed */ #ifndef cmpxchg_acquire @@ -55,9 +55,9 @@ #endif /* cmpxchg_relaxed */ #ifndef cmpxchg64_relaxed -#define cmpxchg64_relaxed cmpxchg64 -#define cmpxchg64_acquire cmpxchg64 -#define cmpxchg64_release cmpxchg64 +#define cmpxchg64_acquire cmpxchg64 +#define cmpxchg64_release cmpxchg64 +#define cmpxchg64_relaxed cmpxchg64 #else /* cmpxchg64_relaxed */ #ifndef cmpxchg64_acquire @@ -77,6 +77,76 @@ #endif /* cmpxchg64_relaxed */ +#ifndef try_cmpxchg_relaxed +#ifdef try_cmpxchg +#define try_cmpxchg_acquire try_cmpxchg +#define try_cmpxchg_release try_cmpxchg +#define try_cmpxchg_relaxed try_cmpxchg +#endif /* try_cmpxchg */ + +#ifndef try_cmpxchg +#define try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg */ + +#ifndef try_cmpxchg_acquire +#define try_cmpxchg_acquire(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg_acquire((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg_acquire */ + +#ifndef try_cmpxchg_release +#define try_cmpxchg_release(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg_release((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg_release */ + +#ifndef try_cmpxchg_relaxed +#define try_cmpxchg_relaxed(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg_relaxed((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg_relaxed */ + +#else /* try_cmpxchg_relaxed */ + +#ifndef try_cmpxchg_acquire +#define try_cmpxchg_acquire(...) \ + __atomic_op_acquire(try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef try_cmpxchg_release +#define try_cmpxchg_release(...) \ + __atomic_op_release(try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef try_cmpxchg +#define try_cmpxchg(...) \ + __atomic_op_fence(try_cmpxchg, __VA_ARGS__) +#endif + +#endif /* try_cmpxchg_relaxed */ + #define arch_atomic_read atomic_read #define arch_atomic_read_acquire atomic_read_acquire @@ -2522,4 +2592,4 @@ atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 9d95b56f98d82a2a26c7b79ccdd0c47572d50a6f +// d78e6c293c661c15188f0ec05bce45188c8d5892 -- cgit v1.2.3 From e563604a5f5a891283b6a8db4001cee833a7c6b8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Aug 2020 22:03:46 +0900 Subject: freelist: Implement lockless freelist A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly speedy under low contention. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870622579.1229682.16729440870040944993.stgit@devnote2 --- include/linux/freelist.h | 129 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 include/linux/freelist.h (limited to 'include/linux') diff --git a/include/linux/freelist.h b/include/linux/freelist.h new file mode 100644 index 000000000000..fc1842b96469 --- /dev/null +++ b/include/linux/freelist.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +#ifndef FREELIST_H +#define FREELIST_H + +#include + +/* + * Copyright: cameron@moodycamel.com + * + * A simple CAS-based lock-free free list. Not the fastest thing in the world + * under heavy contention, but simple and correct (assuming nodes are never + * freed until after the free list is destroyed), and fairly speedy under low + * contention. + * + * Adapted from: https://moodycamel.com/blog/2014/solving-the-aba-problem-for-lock-free-free-lists + */ + +struct freelist_node { + atomic_t refs; + struct freelist_node *next; +}; + +struct freelist_head { + struct freelist_node *head; +}; + +#define REFS_ON_FREELIST 0x80000000 +#define REFS_MASK 0x7FFFFFFF + +static inline void __freelist_add(struct freelist_node *node, struct freelist_head *list) +{ + /* + * Since the refcount is zero, and nobody can increase it once it's + * zero (except us, and we run only one copy of this method per node at + * a time, i.e. the single thread case), then we know we can safely + * change the next pointer of the node; however, once the refcount is + * back above zero, then other threads could increase it (happens under + * heavy contention, when the refcount goes to zero in between a load + * and a refcount increment of a node in try_get, then back up to + * something non-zero, then the refcount increment is done by the other + * thread) -- so if the CAS to add the node to the actual list fails, + * decrese the refcount and leave the add operation to the next thread + * who puts the refcount back to zero (which could be us, hence the + * loop). + */ + struct freelist_node *head = READ_ONCE(list->head); + + for (;;) { + WRITE_ONCE(node->next, head); + atomic_set_release(&node->refs, 1); + + if (!try_cmpxchg_release(&list->head, &head, node)) { + /* + * Hmm, the add failed, but we can only try again when + * the refcount goes back to zero. + */ + if (atomic_fetch_add_release(REFS_ON_FREELIST - 1, &node->refs) == 1) + continue; + } + return; + } +} + +static inline void freelist_add(struct freelist_node *node, struct freelist_head *list) +{ + /* + * We know that the should-be-on-freelist bit is 0 at this point, so + * it's safe to set it using a fetch_add. + */ + if (!atomic_fetch_add_release(REFS_ON_FREELIST, &node->refs)) { + /* + * Oh look! We were the last ones referencing this node, and we + * know we want to add it to the free list, so let's do it! + */ + __freelist_add(node, list); + } +} + +static inline struct freelist_node *freelist_try_get(struct freelist_head *list) +{ + struct freelist_node *prev, *next, *head = smp_load_acquire(&list->head); + unsigned int refs; + + while (head) { + prev = head; + refs = atomic_read(&head->refs); + if ((refs & REFS_MASK) == 0 || + !atomic_try_cmpxchg_acquire(&head->refs, &refs, refs+1)) { + head = smp_load_acquire(&list->head); + continue; + } + + /* + * Good, reference count has been incremented (it wasn't at + * zero), which means we can read the next and not worry about + * it changing between now and the time we do the CAS. + */ + next = READ_ONCE(head->next); + if (try_cmpxchg_acquire(&list->head, &head, next)) { + /* + * Yay, got the node. This means it was on the list, + * which means should-be-on-freelist must be false no + * matter the refcount (because nobody else knows it's + * been taken off yet, it can't have been put back on). + */ + WARN_ON_ONCE(atomic_read(&head->refs) & REFS_ON_FREELIST); + + /* + * Decrease refcount twice, once for our ref, and once + * for the list's ref. + */ + atomic_fetch_add(-2, &head->refs); + + return head; + } + + /* + * OK, the head must have changed on us, but we still need to decrement + * the refcount we increased. + */ + refs = atomic_fetch_add(-1, &prev->refs); + if (refs == REFS_ON_FREELIST + 1) + __freelist_add(prev, list); + } + + return NULL; +} + +#endif /* FREELIST_H */ -- cgit v1.2.3 From 6e426e0fcd20ce144bb93e00b70df51e9f2e08c3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Aug 2020 22:03:56 +0900 Subject: kprobes: Replace rp->free_instance with freelist Gets rid of rp->lock, and as a result kretprobes are now fully lockless. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870623583.1229682.17472357584134058687.stgit@devnote2 --- include/linux/kprobes.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 00cf4421efd5..b7824e3f1ef5 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #ifdef CONFIG_KPROBES @@ -157,17 +158,16 @@ struct kretprobe { int maxactive; int nmissed; size_t data_size; - struct hlist_head free_instances; + struct freelist_head freelist; struct kretprobe_holder *rph; - raw_spinlock_t lock; }; struct kretprobe_instance { union { - struct llist_node llist; - struct hlist_node hlist; + struct freelist_node freelist; struct rcu_head rcu; }; + struct llist_node llist; struct kretprobe_holder *rph; kprobe_opcode_t *ret_addr; void *fp; -- cgit v1.2.3 From 8bca49e43fb5994bd2a03f3f114cf89bc6f87a14 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Apr 2020 15:51:55 +0300 Subject: drm: shmobile: Reduce include dependencies This file doesn't need anything provided by . All it needs are some types, which are provided by . Drop unneeded completely. Signed-off-by: Andy Shevchenko Reviewed-by: Laurent Pinchart Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20200422125201.37618-1-andriy.shevchenko@linux.intel.com --- include/linux/platform_data/shmob_drm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h index fe815d7d9f58..d661399b217d 100644 --- a/include/linux/platform_data/shmob_drm.h +++ b/include/linux/platform_data/shmob_drm.h @@ -10,8 +10,6 @@ #ifndef __SHMOB_DRM_H__ #define __SHMOB_DRM_H__ -#include - #include enum shmob_drm_clk_source { -- cgit v1.2.3 From ac80cd17a615e472e3dcff0a5446a58540e64e6d Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Wed, 14 Oct 2020 09:16:01 -0700 Subject: dma-buf: Clarify that dma-buf sg lists are page aligned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dma-buf API have been used under the assumption that the sg lists returned from dma_buf_map_attachment() are fully page aligned. Lots of stuff can break otherwise all over the place. Clarify this in the documentation and add a check when DMA API debug is enabled. Signed-off-by: Jianxin Xiong Reviewed-by: Christian König Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1602692161-107096-1-git-send-email-jianxin.xiong@intel.com --- include/linux/dma-buf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index cf77cc15f4ba..03875eaed51a 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -146,7 +146,8 @@ struct dma_buf_ops { * * A &sg_table scatter list of or the backing storage of the DMA buffer, * already mapped into the device address space of the &device attached - * with the provided &dma_buf_attachment. + * with the provided &dma_buf_attachment. The addresses and lengths in + * the scatter list are PAGE_SIZE aligned. * * On failure, returns a negative error value wrapped into a pointer. * May also return -EINTR when a signal was received while being -- cgit v1.2.3 From 44cdc1d952e3f7aa9944c1bbf38fc23f49885017 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 27 Sep 2020 11:18:30 -0400 Subject: convert ->f_ep_links/->fllink to hlist we don't care about the order of elements there Signed-off-by: Al Viro --- include/linux/eventpoll.h | 4 ++-- include/linux/fs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 8f000fada5a4..4e215ccfa792 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -25,7 +25,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t /* Used to initialize the epoll bits inside the "struct file" */ static inline void eventpoll_init_file(struct file *file) { - INIT_LIST_HEAD(&file->f_ep_links); + INIT_HLIST_HEAD(&file->f_ep_links); INIT_LIST_HEAD(&file->f_tfile_llink); } @@ -50,7 +50,7 @@ static inline void eventpoll_release(struct file *file) * because the file in on the way to be removed and nobody ( but * eventpoll ) has still a reference to this file. */ - if (likely(list_empty(&file->f_ep_links))) + if (likely(hlist_empty(&file->f_ep_links))) return; /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 0bd126418bb6..b484ba0e474a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -946,7 +946,7 @@ struct file { #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct list_head f_ep_links; + struct hlist_head f_ep_links; struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; -- cgit v1.2.3 From 319c15174757aaedacc89a6e55c965416f130e64 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 1 Oct 2020 20:45:51 -0400 Subject: epoll: take epitem list out of struct file Move the head of epitem list out of struct file; for epoll ones it's moved into struct eventpoll (->refs there), for non-epoll - into the new object (struct epitem_head). In place of ->f_ep_links we leave a pointer to the list head (->f_ep). ->f_ep is protected by ->f_lock and it's zeroed as soon as the list of epitems becomes empty (that can happen only in ep_remove() by now). The list of files for reverse path check is *not* going through struct file now - it's a single-linked list going through epitem_head instances. It's terminated by ERR_PTR(-1) (== EP_UNACTIVE_POINTER), so the elements of list can be distinguished by head->next != NULL. epitem_head instances are allocated at ep_insert() time (by attach_epitem()) and freed either by ep_remove() (if it empties the set of epitems *and* epitem_head does not belong to the reverse path check list) or by clear_tfile_check_list() when the list is emptied (if the set of epitems is empty by that point). Allocations are done from a separate slab - minimal kmalloc() size is too large on some architectures. As the result, we trim struct file _and_ get rid of the games with temporary file references. Locking and barriers are interesting (aren't they always); see unlist_file() and ep_remove() for details. The non-obvious part is that ep_remove() needs to decide if it will be the one to free the damn thing *before* actually storing NULL to head->epitems.first - that's what smp_load_acquire is for in there. unlist_file() lockless path is safe, since we hit it only if we observe NULL in head->epitems.first and whoever had done that store is guaranteed to have observed non-NULL in head->next. IOW, their last access had been the store of NULL into ->epitems.first and we can safely free the sucker. OTOH, we are under rcu_read_lock() and both epitem and epitem->file have their freeing RCU-delayed. So if we see non-NULL ->epitems.first, we can grab ->f_lock (all epitems in there share the same struct file) and safely recheck the emptiness of ->epitems; again, ->next is still non-NULL, so ep_remove() couldn't have freed head yet. ->f_lock serializes us wrt ep_remove(); the rest is trivial. Note that once head->epitems becomes NULL, nothing can get inserted into it - the only remaining reference to head after that point is from the reverse path check list. Signed-off-by: Al Viro --- include/linux/eventpoll.h | 11 +---------- include/linux/fs.h | 5 ++--- 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 4e215ccfa792..0350393465d4 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -22,14 +22,6 @@ struct file; struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff); #endif -/* Used to initialize the epoll bits inside the "struct file" */ -static inline void eventpoll_init_file(struct file *file) -{ - INIT_HLIST_HEAD(&file->f_ep_links); - INIT_LIST_HEAD(&file->f_tfile_llink); -} - - /* Used to release the epoll bits inside the "struct file" */ void eventpoll_release_file(struct file *file); @@ -50,7 +42,7 @@ static inline void eventpoll_release(struct file *file) * because the file in on the way to be removed and nobody ( but * eventpoll ) has still a reference to this file. */ - if (likely(hlist_empty(&file->f_ep_links))) + if (likely(!file->f_ep)) return; /* @@ -72,7 +64,6 @@ static inline int ep_op_has_event(int op) #else -static inline void eventpoll_init_file(struct file *file) {} static inline void eventpoll_release(struct file *file) {} #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index b484ba0e474a..993c540e3b77 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -923,7 +923,7 @@ struct file { const struct file_operations *f_op; /* - * Protects f_ep_links, f_flags. + * Protects f_ep, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; @@ -946,8 +946,7 @@ struct file { #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct hlist_head f_ep_links; - struct list_head f_tfile_llink; + struct hlist_head *f_ep; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; errseq_t f_wb_err; -- cgit v1.2.3 From 96ffcdf239de6f9970178bb7d643e16fd9e68ab9 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 20 Oct 2020 15:12:12 +0900 Subject: PM / devfreq: Remove redundant governor_name from struct devfreq The devfreq structure instance contains the governor_name and a governor instance. When need to show the governor name, better to use the name of devfreq_governor structure. So, governor_name variable in struct devfreq is a redundant and unneeded variable. Remove the redundant governor_name of struct devfreq and then use the name of devfreq_governor instance. Signed-off-by: Chanwoo Choi --- include/linux/devfreq.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 121a2430d7f7..b6d3bae1c74d 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -15,8 +15,6 @@ #include #include -#define DEVFREQ_NAME_LEN 16 - /* DEVFREQ governor name */ #define DEVFREQ_GOV_SIMPLE_ONDEMAND "simple_ondemand" #define DEVFREQ_GOV_PERFORMANCE "performance" @@ -139,7 +137,6 @@ struct devfreq_stats { * using devfreq. * @profile: device-specific devfreq profile * @governor: method how to choose frequency based on the usage. - * @governor_name: devfreq governor name for use with this devfreq * @nb: notifier block used to notify devfreq object that it should * reevaluate operable frequencies. Devfreq users may use * devfreq.nb to the corresponding register notifier call chain. @@ -176,7 +173,6 @@ struct devfreq { struct device dev; struct devfreq_dev_profile *profile; const struct devfreq_governor *governor; - char governor_name[DEVFREQ_NAME_LEN]; struct notifier_block nb; struct delayed_work work; -- cgit v1.2.3 From e275d2109cdaea8b4554b9eb8a828bdb8f8ba068 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 26 Oct 2020 10:08:47 +0200 Subject: bus: ti-sysc: Fix reset status check for modules with quirks Commit d46f9fbec719 ("bus: ti-sysc: Use optional clocks on for enable and wait for softreset bit") started showing a "OCP softreset timed out" warning on enable if the interconnect target module is not out of reset. This caused the warning to be often triggered for i2c and hdq while the devices are working properly. Turns out that some interconnect target modules seem to have an unusable reset status bits unless the module specific reset quirks are activated. Let's just skip the reset status check for those modules as we only want to activate the reset quirks when doing a reset, and not on enable. This way we don't see the bogus "OCP softreset timed out" warnings during boot. Fixes: d46f9fbec719 ("bus: ti-sysc: Use optional clocks on for enable and wait for softreset bit") Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index c59999ce044e..240dce553a0b 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -50,6 +50,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25) #define SYSC_MODULE_QUIRK_PRUSS BIT(24) #define SYSC_MODULE_QUIRK_DSS_RESET BIT(23) #define SYSC_MODULE_QUIRK_RTC_UNLOCK BIT(22) -- cgit v1.2.3 From bc3d7bf61a9eaecccc84dc2ecc2a9a3fa4f5ec47 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Sat, 3 Oct 2020 23:25:31 -0400 Subject: elf: Expose ELF header in compat_start_thread() Like it is done for SET_PERSONALITY with x86, which requires the ELF header to select correct personality parameters, x86 requires the headers on compat_start_thread() to choose starting CS for ELF32 binaries, instead of relying on the going-away TIF_IA32/X32 flags. Add an indirection macro to ELF invocations of START_THREAD, that x86 can reimplement to receive the extra parameter just for ELF files. This requires no changes to other architectures who don't need the header information, they can continue to use the original start_thread for ELF and non-ELF binaries, and it prevents affecting non-ELF code paths for x86. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201004032536.1229030-6-krisman@collabora.com --- include/linux/elf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index 5d5b0321da0b..6dbcfe7a3fd7 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -22,6 +22,11 @@ SET_PERSONALITY(ex) #endif +#ifndef START_THREAD +#define START_THREAD(elf_ex, regs, elf_entry, start_stack) \ + start_thread(regs, elf_entry, start_stack) +#endif + #define ELF32_GNU_PROPERTY_ALIGN 4 #define ELF64_GNU_PROPERTY_ALIGN 8 -- cgit v1.2.3 From 9a29a671902c2be05d636045a4dd365219ca716c Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Sat, 3 Oct 2020 23:25:33 -0400 Subject: elf: Expose ELF header on arch_setup_additional_pages() Like it is done for SET_PERSONALITY with ARM, which requires the ELF header to select correct personality parameters, x86 requires the headers when selecting which VDSO to load, instead of relying on the going-away TIF_IA32/X32 flags. Add an indirection macro to arch_setup_additional_pages(), that x86 can reimplement to receive the extra parameter just for ELF files. This requires no changes to other architectures, who can continue to use the original arch_setup_additional_pages for ELF and non-ELF binaries. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201004032536.1229030-8-krisman@collabora.com --- include/linux/elf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index 6dbcfe7a3fd7..c9a46c4e183b 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -27,6 +27,11 @@ start_thread(regs, elf_entry, start_stack) #endif +#if defined(ARCH_HAS_SETUP_ADDITIONAL_PAGES) && !defined(ARCH_SETUP_ADDITIONAL_PAGES) +#define ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter) \ + arch_setup_additional_pages(bprm, interpreter) +#endif + #define ELF32_GNU_PROPERTY_ALIGN 4 #define ELF64_GNU_PROPERTY_ALIGN 8 -- cgit v1.2.3 From ab589bac553f79d559952aa088480a72258ac5bc Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 19 Oct 2020 13:53:13 +0300 Subject: ASoC: adau1977: remove platform data and move micbias bindings include The change removes the platform_data include/definition. It only contains some values for the MICBIAS. These are moved into 'dt-bindings/sound/adi,adau1977.h' so that they can be used inside device-trees. When moving then, they need to be converted to pre-compiler defines, so that the DT compiler can understand them. The driver then, also needs to include the new 'dt-bindings/sound/adi,adau1977.h' file. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201019105313.24862-1-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- include/linux/platform_data/adau1977.h | 44 ---------------------------------- 1 file changed, 44 deletions(-) delete mode 100644 include/linux/platform_data/adau1977.h (limited to 'include/linux') diff --git a/include/linux/platform_data/adau1977.h b/include/linux/platform_data/adau1977.h deleted file mode 100644 index 86667235077a..000000000000 --- a/include/linux/platform_data/adau1977.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * ADAU1977/ADAU1978/ADAU1979 driver - * - * Copyright 2014 Analog Devices Inc. - * Author: Lars-Peter Clausen - */ - -#ifndef __LINUX_PLATFORM_DATA_ADAU1977_H__ -#define __LINUX_PLATFORM_DATA_ADAU1977_H__ - -/** - * enum adau1977_micbias - ADAU1977 MICBIAS pin voltage setting - * @ADAU1977_MICBIAS_5V0: MICBIAS is set to 5.0 V - * @ADAU1977_MICBIAS_5V5: MICBIAS is set to 5.5 V - * @ADAU1977_MICBIAS_6V0: MICBIAS is set to 6.0 V - * @ADAU1977_MICBIAS_6V5: MICBIAS is set to 6.5 V - * @ADAU1977_MICBIAS_7V0: MICBIAS is set to 7.0 V - * @ADAU1977_MICBIAS_7V5: MICBIAS is set to 7.5 V - * @ADAU1977_MICBIAS_8V0: MICBIAS is set to 8.0 V - * @ADAU1977_MICBIAS_8V5: MICBIAS is set to 8.5 V - * @ADAU1977_MICBIAS_9V0: MICBIAS is set to 9.0 V - */ -enum adau1977_micbias { - ADAU1977_MICBIAS_5V0 = 0x0, - ADAU1977_MICBIAS_5V5 = 0x1, - ADAU1977_MICBIAS_6V0 = 0x2, - ADAU1977_MICBIAS_6V5 = 0x3, - ADAU1977_MICBIAS_7V0 = 0x4, - ADAU1977_MICBIAS_7V5 = 0x5, - ADAU1977_MICBIAS_8V0 = 0x6, - ADAU1977_MICBIAS_8V5 = 0x7, - ADAU1977_MICBIAS_9V0 = 0x8, -}; - -/** - * struct adau1977_platform_data - Platform configuration data for the ADAU1977 - * @micbias: Specifies the voltage for the MICBIAS pin - */ -struct adau1977_platform_data { - enum adau1977_micbias micbias; -}; - -#endif -- cgit v1.2.3 From 6e1e90ec027509a7e8d4efbd77a65b32b5a8b3ec Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Wed, 14 Oct 2020 23:30:24 +0300 Subject: regmap: mmio: add config option to allow relaxed MMIO accesses On some platforms (eg armv7 due to the CONFIG_ARM_DMA_MEM_BUFFERABLE) MMIO R/W operations always add memory barriers which can increase load, decrease battery life or in general reduce performance unnecessarily on devices which access a lot of configuration registers and where ordering does not matter (eg. media accelerators like the Verisilicon / Hantro video decoders). Drivers used to call the relaxed MMIO variants directly but since they are now accessing the MMIO registers via regmaps (to compensate for different VPU HW reg layouts via regmap fields), there is a need for a relaxed API / config to preserve existing behaviour. Cc: Mark Brown Signed-off-by: Adrian Ratiu Link: https://lore.kernel.org/r/20201014203024.954369-1-adrian.ratiu@collabora.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e7834d98207f..126fe700d1d8 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -315,6 +315,10 @@ typedef void (*regmap_unlock)(void *); * masks are used. * @zero_flag_mask: If set, read_flag_mask and write_flag_mask are used even * if they are both empty. + * @use_relaxed_mmio: If set, MMIO R/W operations will not use memory barriers. + * This can avoid load on devices which don't require strict + * orderings, but drivers should carefully add any explicit + * memory barriers when they may require them. * @use_single_read: If set, converts the bulk read operation into a series of * single read operations. This is useful for a device that * does not support bulk read. @@ -388,6 +392,7 @@ struct regmap_config { bool use_single_read; bool use_single_write; + bool use_relaxed_mmio; bool can_multi_write; enum regmap_endian reg_format_endian; -- cgit v1.2.3 From ab7cffb8d2367e5b088c7c14452724e719a10eba Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 26 Oct 2020 20:20:40 +0100 Subject: MIPS: ingenic: remove unused platform_data header file There are no users of this headers file in the kernel. All users are likely migrated to device tree which is a good thing. Signed-off-by: Sam Ravnborg Cc: Thomas Bogendoerfer Cc: Paul Cercueil Cc: Harvey Hunt Cc: linux-mips@vger.kernel.org Reviewed-by: Paul Cercueil Signed-off-by: Thomas Bogendoerfer --- include/linux/platform_data/jz4740/jz4740_nand.h | 25 ------------------------ 1 file changed, 25 deletions(-) delete mode 100644 include/linux/platform_data/jz4740/jz4740_nand.h (limited to 'include/linux') diff --git a/include/linux/platform_data/jz4740/jz4740_nand.h b/include/linux/platform_data/jz4740/jz4740_nand.h deleted file mode 100644 index b3f066d63059..000000000000 --- a/include/linux/platform_data/jz4740/jz4740_nand.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2009-2010, Lars-Peter Clausen - * JZ4740 SoC NAND controller driver - */ - -#ifndef __JZ4740_NAND_H__ -#define __JZ4740_NAND_H__ - -#include -#include - -#define JZ_NAND_NUM_BANKS 4 - -struct jz_nand_platform_data { - int num_partitions; - struct mtd_partition *partitions; - - unsigned char banks[JZ_NAND_NUM_BANKS]; - - void (*ident_callback)(struct platform_device *, struct mtd_info *, - struct mtd_partition **, int *num_partitions); -}; - -#endif -- cgit v1.2.3 From 343a3e8bc635bd4c58d45a4fe67f9c3a78fbd191 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:20:50 +0100 Subject: bpf: Fix -Wshadow warnings There are thousands of warnings about one macro in a W=2 build: include/linux/filter.h:561:6: warning: declaration of 'ret' shadows a previous local [-Wshadow] Prefix all the locals in that macro with __ to avoid most of these warnings. Fixes: 492ecee892c2 ("bpf: enable program stats") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201026162110.3710415-1-arnd@kernel.org --- include/linux/filter.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 72d62cbc1578..1b62397bd124 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -558,21 +558,21 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); #define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ - u32 ret; \ + u32 __ret; \ cant_migrate(); \ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ - struct bpf_prog_stats *stats; \ - u64 start = sched_clock(); \ - ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ - stats = this_cpu_ptr(prog->aux->stats); \ - u64_stats_update_begin(&stats->syncp); \ - stats->cnt++; \ - stats->nsecs += sched_clock() - start; \ - u64_stats_update_end(&stats->syncp); \ + struct bpf_prog_stats *__stats; \ + u64 __start = sched_clock(); \ + __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + __stats = this_cpu_ptr(prog->aux->stats); \ + u64_stats_update_begin(&__stats->syncp); \ + __stats->cnt++; \ + __stats->nsecs += sched_clock() - __start; \ + u64_stats_update_end(&__stats->syncp); \ } else { \ - ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ } \ - ret; }) + __ret; }) #define BPF_PROG_RUN(prog, ctx) \ __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) -- cgit v1.2.3 From 6d915476e67d99b73a57bceb83cff1cf153d8bf6 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 22 Sep 2020 08:44:50 -0400 Subject: audit: trigger accompanying records when no rules present When there are no audit rules registered, mandatory records (config, etc.) are missing their accompanying records (syscall, proctitle, etc.). This is due to audit context dummy set on syscall entry based on absence of rules that signals that no other records are to be printed. Clear the dummy bit if any record is generated, open coding this in audit_log_start(). The proctitle context and dummy checks are pointless since the proctitle record will not be printed if no syscall records are printed. The fds array is reset to -1 after the first syscall to indicate it isn't valid any more, but was never set to -1 when the context was allocated to indicate it wasn't yet valid. Check ctx->pwd in audit_log_name(). The audit_inode* functions can be called without going through getname_flags() or getname_kernel() that sets audit_names and cwd, so set the cwd in audit_alloc_name() if it has not already been done so due to audit_names being valid and purge all other audit_getcwd() calls. Revert the LSM dump_common_audit_data() LSM_AUDIT_DATA_* cases from the ghak96 patch since they are no longer necessary due to cwd coverage in audit_alloc_name(). Thanks to bauen1 for reporting LSM situations in which context->cwd is not valid, inadvertantly fixed by the ghak96 patch. Please see upstream github issue https://github.com/linux-audit/audit-kernel/issues/120 This is also related to upstream github issue https://github.com/linux-audit/audit-kernel/issues/96 Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index b3d859831a31..82b7c1116a85 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -292,7 +292,6 @@ extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1, extern void __audit_syscall_exit(int ret_success, long ret_value); extern struct filename *__audit_reusename(const __user char *uptr); extern void __audit_getname(struct filename *name); -extern void __audit_getcwd(void); extern void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags); extern void __audit_file(const struct file *); @@ -351,11 +350,6 @@ static inline void audit_getname(struct filename *name) if (unlikely(!audit_dummy_context())) __audit_getname(name); } -static inline void audit_getcwd(void) -{ - if (unlikely(audit_context())) - __audit_getcwd(); -} static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int aflags) { @@ -584,8 +578,6 @@ static inline struct filename *audit_reusename(const __user char *name) } static inline void audit_getname(struct filename *name) { } -static inline void audit_getcwd(void) -{ } static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int aflags) -- cgit v1.2.3 From 95de5094f5ac50b6f355f4e7dffcb6f34bd5dada Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 22 Sep 2020 18:24:29 +0800 Subject: firmware: imx: add dummy functions add dummy functions to avoid build failure when header files are included, but drivers are not built. Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- include/linux/firmware/imx/ipc.h | 13 +++++++++++++ include/linux/firmware/imx/sci.h | 27 +++++++++++++++++++++++++++ include/linux/firmware/imx/svc/misc.h | 19 +++++++++++++++++++ 3 files changed, 59 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/imx/ipc.h b/include/linux/firmware/imx/ipc.h index 891057434858..0b4643571625 100644 --- a/include/linux/firmware/imx/ipc.h +++ b/include/linux/firmware/imx/ipc.h @@ -34,6 +34,7 @@ struct imx_sc_rpc_msg { uint8_t func; }; +#ifdef CONFIG_IMX_SCU /* * This is an function to send an RPC message over an IPC channel. * It is called by client-side SCFW API function shims. @@ -55,4 +56,16 @@ int imx_scu_call_rpc(struct imx_sc_ipc *ipc, void *msg, bool have_resp); * @return Returns an error code (0 = success, failed if < 0) */ int imx_scu_get_handle(struct imx_sc_ipc **ipc); +#else +static inline int imx_scu_call_rpc(struct imx_sc_ipc *ipc, void *msg, + bool have_resp) +{ + return -ENOTSUPP; +} + +static inline int imx_scu_get_handle(struct imx_sc_ipc **ipc) +{ + return -ENOTSUPP; +} +#endif #endif /* _SC_IPC_H */ diff --git a/include/linux/firmware/imx/sci.h b/include/linux/firmware/imx/sci.h index 22c76571a294..5cc63fe7e84d 100644 --- a/include/linux/firmware/imx/sci.h +++ b/include/linux/firmware/imx/sci.h @@ -16,9 +16,36 @@ #include #include +#if IS_ENABLED(CONFIG_IMX_SCU) int imx_scu_enable_general_irq_channel(struct device *dev); int imx_scu_irq_register_notifier(struct notifier_block *nb); int imx_scu_irq_unregister_notifier(struct notifier_block *nb); int imx_scu_irq_group_enable(u8 group, u32 mask, u8 enable); int imx_scu_soc_init(struct device *dev); +#else +static inline int imx_scu_soc_init(struct device *dev) +{ + return -ENOTSUPP; +} + +static inline int imx_scu_enable_general_irq_channel(struct device *dev) +{ + return -ENOTSUPP; +} + +static inline int imx_scu_irq_register_notifier(struct notifier_block *nb) +{ + return -ENOTSUPP; +} + +static inline int imx_scu_irq_unregister_notifier(struct notifier_block *nb) +{ + return -ENOTSUPP; +} + +static inline int imx_scu_irq_group_enable(u8 group, u32 mask, u8 enable) +{ + return -ENOTSUPP; +} +#endif #endif /* _SC_SCI_H */ diff --git a/include/linux/firmware/imx/svc/misc.h b/include/linux/firmware/imx/svc/misc.h index 031dd4d3c766..760db08a67fc 100644 --- a/include/linux/firmware/imx/svc/misc.h +++ b/include/linux/firmware/imx/svc/misc.h @@ -46,6 +46,7 @@ enum imx_misc_func { * Control Functions */ +#ifdef CONFIG_IMX_SCU int imx_sc_misc_set_control(struct imx_sc_ipc *ipc, u32 resource, u8 ctrl, u32 val); @@ -54,5 +55,23 @@ int imx_sc_misc_get_control(struct imx_sc_ipc *ipc, u32 resource, int imx_sc_pm_cpu_start(struct imx_sc_ipc *ipc, u32 resource, bool enable, u64 phys_addr); +#else +static inline int imx_sc_misc_set_control(struct imx_sc_ipc *ipc, + u32 resource, u8 ctrl, u32 val) +{ + return -ENOTSUPP; +} +static inline int imx_sc_misc_get_control(struct imx_sc_ipc *ipc, + u32 resource, u8 ctrl, u32 *val) +{ + return -ENOTSUPP; +} + +static inline int imx_sc_pm_cpu_start(struct imx_sc_ipc *ipc, u32 resource, + bool enable, u64 phys_addr) +{ + return -ENOTSUPP; +} +#endif #endif /* _SC_MISC_API_H */ -- cgit v1.2.3 From ea856ec266c1e6aecd2b107032d5b5d661f0686d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20=C4=8Cavoj?= Date: Wed, 21 Oct 2020 00:09:44 +0200 Subject: platform/x86: asus-wmi: Add support for SW_TABLET_MODE on UX360 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The UX360CA has a WMI device id 0x00060062, which reports whether the lid is flipped in tablet mode (1) or in normal laptop mode (0). Add a quirk (quirk_asus_use_lid_flip_devid) for devices on which this WMI device should be used to figure out the SW_TABLET_MODE state, as opposed to the quirk_asus_use_kbd_dock_devid. Additionally, the device needs to be queried on resume and restore because the firmware does not generate an event if the laptop is put to sleep while in tablet mode, flipped to normal mode, and later awoken. It is assumed other UX360* models have the same WMI device. As such, the quirk is applied to devices with DMI_MATCH(DMI_PRODUCT_NAME, "UX360"). More devices with this feature need to be tested and added accordingly. The reason for using an allowlist via the quirk mechanism is that the new WMI device (0x00060062) is also present on some models which do not have a 360 degree hinge (at least FX503VD and GL503VD from Hans' DSTS collection) and therefore its presence cannot be relied on. Signed-off-by: Samuel Čavoj Cc: Hans de Goede Link: https://lore.kernel.org/r/20201020220944.1075530-1-samuel@cavoj.net Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/asus-wmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 897b8332a39f..2f274cf52805 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -62,6 +62,7 @@ /* Misc */ #define ASUS_WMI_DEVID_CAMERA 0x00060013 +#define ASUS_WMI_DEVID_LID_FLIP 0x00060062 /* Storage */ #define ASUS_WMI_DEVID_CARDREADER 0x00080013 -- cgit v1.2.3 From da31de35cd2fb78f75788873e0b61e56d4bb1a90 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Sat, 10 Oct 2020 02:47:49 +0200 Subject: tty: goldfish: use __raw_writel()/__raw_readl() gf_early_console_putchar() uses __raw_writel() but the standard driver uses writel()/readl(). This means we can't use both on the same machine as the device is either big-endian, little-endian or native-endian. As android implementation defines the endianness of the device is the one of the architecture replace all writel()/readl() by __raw_writel()/__raw_readl() https://android.googlesource.com/platform/external/qemu/+/refs/heads/emu-master-dev/hw/char/goldfish_tty.c#222 Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20201010004749.1201695-1-laurent@vivier.eu Signed-off-by: Greg Kroah-Hartman --- include/linux/goldfish.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h index 265a099cd3b8..12be1601fd84 100644 --- a/include/linux/goldfish.h +++ b/include/linux/goldfish.h @@ -13,9 +13,9 @@ static inline void gf_write_ptr(const void *ptr, void __iomem *portl, { const unsigned long addr = (unsigned long)ptr; - writel(lower_32_bits(addr), portl); + __raw_writel(lower_32_bits(addr), portl); #ifdef CONFIG_64BIT - writel(upper_32_bits(addr), porth); + __raw_writel(upper_32_bits(addr), porth); #endif } @@ -23,9 +23,9 @@ static inline void gf_write_dma_addr(const dma_addr_t addr, void __iomem *portl, void __iomem *porth) { - writel(lower_32_bits(addr), portl); + __raw_writel(lower_32_bits(addr), portl); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - writel(upper_32_bits(addr), porth); + __raw_writel(upper_32_bits(addr), porth); #endif } -- cgit v1.2.3 From caabdd0f59a9771ed095efe3ad5a08867b976ab2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 19 Oct 2020 09:35:39 +0200 Subject: ctype.h: remove duplicate isdigit() helper gcc warns a few thousand times about the isdigit() shadow: include/linux/ctype.h:26:19: warning: declaration of 'isdigit' shadows a built-in function [-Wshadow] As there is already a compiler builtin, just use that, and make it clear we do that by defining a macro. Unfortunately, clang does not have the isdigit() builtin, so this has to be conditional. Signed-off-by: Arnd Bergmann --- include/linux/compiler_types.h | 11 +++++++++++ include/linux/ctype.h | 15 +++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6e390d58a9f8..5d41dff3f79c 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -64,6 +64,17 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* Attributes */ #include +/* Builtins */ + +/* + * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. + * In the meantime, to support gcc < 10, we implement __has_builtin + * by hand. + */ +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + /* Compiler specific macros. */ #ifdef __clang__ #include diff --git a/include/linux/ctype.h b/include/linux/ctype.h index 363b004426db..bc95aef2219c 100644 --- a/include/linux/ctype.h +++ b/include/linux/ctype.h @@ -2,6 +2,8 @@ #ifndef _LINUX_CTYPE_H #define _LINUX_CTYPE_H +#include + /* * NOTE! This ctype does not handle EOF like the standard C * library is required to. @@ -23,10 +25,6 @@ extern const unsigned char _ctype[]; #define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) #define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) #define iscntrl(c) ((__ismask(c)&(_C)) != 0) -static inline int isdigit(int c) -{ - return '0' <= c && c <= '9'; -} #define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) #define islower(c) ((__ismask(c)&(_L)) != 0) #define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) @@ -39,6 +37,15 @@ static inline int isdigit(int c) #define isascii(c) (((unsigned char)(c))<=0x7f) #define toascii(c) (((unsigned char)(c))&0x7f) +#if __has_builtin(__builtin_isdigit) +#define isdigit(c) __builtin_isdigit(c) +#else +static inline int isdigit(int c) +{ + return '0' <= c && c <= '9'; +} +#endif + static inline unsigned char __tolower(unsigned char c) { if (isupper(c)) -- cgit v1.2.3 From 39613eaad3ceff320da344427a70c655e783475e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 28 Oct 2020 12:16:10 +0530 Subject: qcom-geni-se: remove has_opp_table has_opp_table isn't used anymore, remove it. Signed-off-by: Viresh Kumar Link: https://lore.kernel.org/r/08ec1ee1d4252a266956abb5f1e0e0026d753564.1603867487.git.viresh.kumar@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/qcom-geni-se.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h index f7bbea3f09ca..ec2ad4b0fe14 100644 --- a/include/linux/qcom-geni-se.h +++ b/include/linux/qcom-geni-se.h @@ -48,7 +48,6 @@ struct geni_icc_path { * @clk_perf_tbl: Table of clock frequency input to serial engine clock * @icc_paths: Array of ICC paths for SE * @opp_table: Pointer to the OPP table - * @has_opp_table: Specifies if the SE has an OPP table */ struct geni_se { void __iomem *base; @@ -59,7 +58,6 @@ struct geni_se { unsigned long *clk_perf_tbl; struct geni_icc_path icc_paths[3]; struct opp_table *opp_table; - bool has_opp_table; }; /* Common SE registers */ -- cgit v1.2.3 From f1f37abbe6fc2b1242f78157db76e48dbf9518ee Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 19 Oct 2020 15:40:46 +0200 Subject: gpio: Retire the explicit gpio irqchip code Now that all gpiolib irqchip users have been over to use the irqchip template, we can finally retire the old code path and leave just one way in to the irqchip: set up the template when registering the gpio_chip. For a while we had two code paths for this which was a bit confusing. This brings this work to a conclusion, there is now one way of doing this. Signed-off-by: Linus Walleij Reviewed-by: Andy Shevchenko Cc: Thierry Reding Link: https://lore.kernel.org/r/20201019134046.65101-1-linus.walleij@linaro.org --- include/linux/gpio/driver.h | 71 --------------------------------------------- 1 file changed, 71 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4a7e295c3640..286de0520574 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -621,83 +621,12 @@ int gpiochip_irq_domain_activate(struct irq_domain *domain, void gpiochip_irq_domain_deactivate(struct irq_domain *domain, struct irq_data *data); -void gpiochip_set_nested_irqchip(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int parent_irq); - -int gpiochip_irqchip_add_key(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int first_irq, - irq_flow_handler_t handler, - unsigned int type, - bool threaded, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); - bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc, unsigned int offset); int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain); -#ifdef CONFIG_LOCKDEP - -/* - * Lockdep requires that each irqchip instance be created with a - * unique key so as to avoid unnecessary warnings. This upfront - * boilerplate static inlines provides such a key for each - * unique instance. - */ -static inline int gpiochip_irqchip_add(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int first_irq, - irq_flow_handler_t handler, - unsigned int type) -{ - static struct lock_class_key lock_key; - static struct lock_class_key request_key; - - return gpiochip_irqchip_add_key(gc, irqchip, first_irq, - handler, type, false, - &lock_key, &request_key); -} - -static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int first_irq, - irq_flow_handler_t handler, - unsigned int type) -{ - - static struct lock_class_key lock_key; - static struct lock_class_key request_key; - - return gpiochip_irqchip_add_key(gc, irqchip, first_irq, - handler, type, true, - &lock_key, &request_key); -} -#else /* ! CONFIG_LOCKDEP */ -static inline int gpiochip_irqchip_add(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int first_irq, - irq_flow_handler_t handler, - unsigned int type) -{ - return gpiochip_irqchip_add_key(gc, irqchip, first_irq, - handler, type, false, NULL, NULL); -} - -static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int first_irq, - irq_flow_handler_t handler, - unsigned int type) -{ - return gpiochip_irqchip_add_key(gc, irqchip, first_irq, - handler, type, true, NULL, NULL); -} -#endif /* CONFIG_LOCKDEP */ - int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset); void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset); int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset, -- cgit v1.2.3 From 6a6223ec7779dfdabb9c2567bb42079bc300cf27 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:13 +0100 Subject: blk-mq: docs: add kernel-doc description for a new struct member As reported by kernel-doc: ./include/linux/blk-mq.h:267: warning: Function parameter or member 'active_queues_shared_sbitmap' not described in 'blk_mq_tag_set' There is now a new member for struct blk_mq_tag_set. Add a description for it, based on the commit that introduced it. Fixes: f1b49fdc1c64 ("blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jens Axboe Reviewed-by: John Garry Link: https://lore.kernel.org/r/8e513153b83eefc05e358f51f2632b592c3f6772.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b23eeca4d677..794b2a33a2c3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -235,6 +235,8 @@ enum hctx_type { * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. + * @active_queues_shared_sbitmap: + * number of active request queues per tag set. * @__bitmap_tags: A shared tags sbitmap, used over all hctx's * @__breserved_tags: * A shared reserved tags sbitmap, used over all hctx's -- cgit v1.2.3 From 89b422354409c275e898d26607201797cc05a932 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:17 +0100 Subject: mm: pagemap.h: fix two kernel-doc markups Changeset a8cf7f272b5a ("mm: add find_lock_head") renamed the index parameter, but forgot to update the kernel-doc markups accordingly. Fixes: a8cf7f272b5a ("mm: add find_lock_head") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/dce89b296a4f5f9f8f798d5e76b6736c14a916ac.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/pagemap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index c77b7c31b2e4..e1e19c1f9ec9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -344,9 +344,9 @@ static inline struct page *find_get_page_flags(struct address_space *mapping, /** * find_lock_page - locate, pin and lock a pagecache page * @mapping: the address_space to search - * @offset: the page index + * @index: the page index * - * Looks up the page cache entry at @mapping & @offset. If there is a + * Looks up the page cache entry at @mapping & @index. If there is a * page cache page, it is returned locked and with an increased * refcount. * @@ -363,9 +363,9 @@ static inline struct page *find_lock_page(struct address_space *mapping, /** * find_lock_head - Locate, pin and lock a pagecache page. * @mapping: The address_space to search. - * @offset: The page index. + * @index: The page index. * - * Looks up the page cache entry at @mapping & @offset. If there is a + * Looks up the page cache entry at @mapping & @index. If there is a * page cache page, its head page is returned locked and with an increased * refcount. * -- cgit v1.2.3 From e86c6569c588a01f20e7554cc245f8fae831957b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:18 +0100 Subject: net: phy: remove kernel-doc duplication Sphinx 3 now checks for duplicated function declarations: .../Documentation/networking/kapi:143: ../include/linux/phy.h:163: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'unsigned int phy_supported_speeds (struct phy_device *phy, unsigned int *speeds, unsigned int size)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1034: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int phy_read_mmd (struct phy_device *phydev, int devad, u32 regnum)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1076: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int __phy_read_mmd (struct phy_device *phydev, int devad, u32 regnum)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1088: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int phy_write_mmd (struct phy_device *phydev, int devad, u32 regnum, u16 val)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1100: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int __phy_write_mmd (struct phy_device *phydev, int devad, u32 regnum, u16 val)'. It turns that both the C and the H files have the same kernel-doc markup for the same functions. Let's drop the at the header file, keeping the one closer to the code. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/75e9a357f9a716833d2094b04898754876365e68.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/phy.h | 40 +++++----------------------------------- 1 file changed, 5 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index eb3cb1a98b45..56563e5e0dc7 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -147,16 +147,8 @@ typedef enum { PHY_INTERFACE_MODE_MAX, } phy_interface_t; -/** +/* * phy_supported_speeds - return all speeds currently supported by a PHY device - * @phy: The PHY device to return supported speeds of. - * @speeds: buffer to store supported speeds in. - * @size: size of speeds buffer. - * - * Description: Returns the number of supported speeds, and fills - * the speeds buffer with the supported speeds. If speeds buffer is - * too small to contain all currently supported speeds, will return as - * many speeds as can fit. */ unsigned int phy_supported_speeds(struct phy_device *phy, unsigned int *speeds, @@ -1022,14 +1014,9 @@ static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum, regnum, mask, set); } -/** +/* * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Same rules as for phy_read(); */ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); @@ -1064,38 +1051,21 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); __ret; \ }) -/** +/* * __phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Same rules as for __phy_read(); */ int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); -/** +/* * phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to write to - * @regnum: The register on the MMD to read - * @val: value to write to @regnum - * - * Same rules as for phy_write(); */ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); -/** +/* * __phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to write to - * @regnum: The register on the MMD to read - * @val: value to write to @regnum - * - * Same rules as for __phy_write(); */ int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); -- cgit v1.2.3 From cf38cc9f1e71151f22584c40357afaab6609384b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:23 +0100 Subject: locking/refcount: move kernel-doc markups to the proper place Changeset a435b9a14356 ("locking/refcount: Provide __refcount API to obtain the old value") added a set of functions starting with __ that have a new parameter, adding a series of new warnings: $ ./scripts/kernel-doc -none include/linux/refcount.h include/linux/refcount.h:169: warning: Function parameter or member 'oldp' not described in '__refcount_add_not_zero' include/linux/refcount.h:208: warning: Function parameter or member 'oldp' not described in '__refcount_add' include/linux/refcount.h:239: warning: Function parameter or member 'oldp' not described in '__refcount_inc_not_zero' include/linux/refcount.h:261: warning: Function parameter or member 'oldp' not described in '__refcount_inc' include/linux/refcount.h:291: warning: Function parameter or member 'oldp' not described in '__refcount_sub_and_test' include/linux/refcount.h:327: warning: Function parameter or member 'oldp' not described in '__refcount_dec_and_test' include/linux/refcount.h:347: warning: Function parameter or member 'oldp' not described in '__refcount_dec' The issue is that the kernel-doc markups are now misplaced, as they should be added just before the functions. So, move the kernel-doc markups to the proper places, in order to drop the warnings. It should be noticed that git show produces a crappy output, for this patch without "--patience" flag. Fixes: a435b9a14356 ("locking/refcount: Provide __refcount API to obtain the old value") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/7985c31d1ace591bc5e1faa05c367f1295b78afd.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/refcount.h | 130 +++++++++++++++++++++++------------------------ 1 file changed, 65 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 7fabb1af18e0..497990c69b0b 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -147,24 +147,6 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -/** - * refcount_add_not_zero - add a value to a refcount unless it is 0 - * @i: the value to add to the refcount - * @r: the refcount - * - * Will saturate at REFCOUNT_SATURATED and WARN. - * - * Provides no memory ordering, it is assumed the caller has guaranteed the - * object memory to be stable (RCU, etc.). It does provide a control dependency - * and thereby orders future stores. See the comment on top. - * - * Use of this function is not recommended for the normal reference counting - * use case in which references are taken and released one at a time. In these - * cases, refcount_inc(), or one of its variants, should instead be used to - * increment a reference count. - * - * Return: false if the passed refcount is 0, true otherwise - */ static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); @@ -183,17 +165,12 @@ static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, in return old; } -static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) -{ - return __refcount_add_not_zero(i, r, NULL); -} - /** - * refcount_add - add a value to a refcount + * refcount_add_not_zero - add a value to a refcount unless it is 0 * @i: the value to add to the refcount * @r: the refcount * - * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. + * Will saturate at REFCOUNT_SATURATED and WARN. * * Provides no memory ordering, it is assumed the caller has guaranteed the * object memory to be stable (RCU, etc.). It does provide a control dependency @@ -203,7 +180,14 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) * use case in which references are taken and released one at a time. In these * cases, refcount_inc(), or one of its variants, should instead be used to * increment a reference count. + * + * Return: false if the passed refcount is 0, true otherwise */ +static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) +{ + return __refcount_add_not_zero(i, r, NULL); +} + static inline void __refcount_add(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_add_relaxed(i, &r->refs); @@ -217,11 +201,32 @@ static inline void __refcount_add(int i, refcount_t *r, int *oldp) refcount_warn_saturate(r, REFCOUNT_ADD_OVF); } +/** + * refcount_add - add a value to a refcount + * @i: the value to add to the refcount + * @r: the refcount + * + * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + */ static inline void refcount_add(int i, refcount_t *r) { __refcount_add(i, r, NULL); } +static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero(1, r, oldp); +} + /** * refcount_inc_not_zero - increment a refcount unless it is 0 * @r: the refcount to increment @@ -235,14 +240,14 @@ static inline void refcount_add(int i, refcount_t *r) * * Return: true if the increment was successful, false otherwise */ -static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp) +static inline __must_check bool refcount_inc_not_zero(refcount_t *r) { - return __refcount_add_not_zero(1, r, oldp); + return __refcount_inc_not_zero(r, NULL); } -static inline __must_check bool refcount_inc_not_zero(refcount_t *r) +static inline void __refcount_inc(refcount_t *r, int *oldp) { - return __refcount_inc_not_zero(r, NULL); + __refcount_add(1, r, oldp); } /** @@ -257,14 +262,27 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) * Will WARN if the refcount is 0, as this represents a possible use-after-free * condition. */ -static inline void __refcount_inc(refcount_t *r, int *oldp) +static inline void refcount_inc(refcount_t *r) { - __refcount_add(1, r, oldp); + __refcount_inc(r, NULL); } -static inline void refcount_inc(refcount_t *r) +static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { - __refcount_inc(r, NULL); + int old = atomic_fetch_sub_release(i, &r->refs); + + if (oldp) + *oldp = old; + + if (old == i) { + smp_acquire__after_ctrl_dep(); + return true; + } + + if (unlikely(old < 0 || old - i < 0)) + refcount_warn_saturate(r, REFCOUNT_SUB_UAF); + + return false; } /** @@ -287,27 +305,14 @@ static inline void refcount_inc(refcount_t *r) * * Return: true if the resulting refcount is 0, false otherwise */ -static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) +static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) { - int old = atomic_fetch_sub_release(i, &r->refs); - - if (oldp) - *oldp = old; - - if (old == i) { - smp_acquire__after_ctrl_dep(); - return true; - } - - if (unlikely(old < 0 || old - i < 0)) - refcount_warn_saturate(r, REFCOUNT_SUB_UAF); - - return false; + return __refcount_sub_and_test(i, r, NULL); } -static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) +static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp) { - return __refcount_sub_and_test(i, r, NULL); + return __refcount_sub_and_test(1, r, oldp); } /** @@ -323,26 +328,11 @@ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) * * Return: true if the resulting refcount is 0, false otherwise */ -static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp) -{ - return __refcount_sub_and_test(1, r, oldp); -} - static inline __must_check bool refcount_dec_and_test(refcount_t *r) { return __refcount_dec_and_test(r, NULL); } -/** - * refcount_dec - decrement a refcount - * @r: the refcount - * - * Similar to atomic_dec(), it will WARN on underflow and fail to decrement - * when saturated at REFCOUNT_SATURATED. - * - * Provides release memory ordering, such that prior loads and stores are done - * before. - */ static inline void __refcount_dec(refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(1, &r->refs); @@ -354,6 +344,16 @@ static inline void __refcount_dec(refcount_t *r, int *oldp) refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); } +/** + * refcount_dec - decrement a refcount + * @r: the refcount + * + * Similar to atomic_dec(), it will WARN on underflow and fail to decrement + * when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before. + */ static inline void refcount_dec(refcount_t *r) { __refcount_dec(r, NULL); -- cgit v1.2.3 From 7cb415003468d41aecd6877ae088c38f6c0fc174 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Wed, 28 Oct 2020 06:56:47 -0400 Subject: Fonts: Make font size unsigned in font_desc `width` and `height` are defined as unsigned in our UAPI font descriptor `struct console_font`. Make them unsigned in our kernel font descriptor `struct font_desc`, too. Also, change the corresponding printk() format identifiers from `%d` to `%u`, in sti_select_fbfont(). Signed-off-by: Peilin Ye Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201028105647.1210161-1-yepeilin.cs@gmail.com --- include/linux/font.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/font.h b/include/linux/font.h index 4a3f8741bb7e..b7214d7881f0 100644 --- a/include/linux/font.h +++ b/include/linux/font.h @@ -16,7 +16,7 @@ struct font_desc { int idx; const char *name; - int width, height; + unsigned int width, height; const void *data; int pref; }; -- cgit v1.2.3 From 8073c1ac82c12aaf1b475a3ce5328d43b3eaa4ae Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 Oct 2020 22:35:11 +0100 Subject: genirq/msi: Allow shadow declarations of msi_msg:: $member Architectures like x86 have their MSI messages in various bits of the data, address_lo and address_hi field. Composing or decomposing these messages with bitmasks and shifts is possible, but unreadable gunk. Allow architectures to provide an architecture specific representation for each member of msi_msg. Provide empty defaults for each and stick them into an union. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201024213535.443185-12-dwmw2@infradead.org --- include/linux/msi.h | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 6b584cc4757c..360a0a7e7341 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -4,11 +4,50 @@ #include #include +#include + +/* Dummy shadow structures if an architecture does not define them */ +#ifndef arch_msi_msg_addr_lo +typedef struct arch_msi_msg_addr_lo { + u32 address_lo; +} __attribute__ ((packed)) arch_msi_msg_addr_lo_t; +#endif + +#ifndef arch_msi_msg_addr_hi +typedef struct arch_msi_msg_addr_hi { + u32 address_hi; +} __attribute__ ((packed)) arch_msi_msg_addr_hi_t; +#endif + +#ifndef arch_msi_msg_data +typedef struct arch_msi_msg_data { + u32 data; +} __attribute__ ((packed)) arch_msi_msg_data_t; +#endif +/** + * msi_msg - Representation of a MSI message + * @address_lo: Low 32 bits of msi message address + * @arch_addrlo: Architecture specific shadow of @address_lo + * @address_hi: High 32 bits of msi message address + * (only used when device supports it) + * @arch_addrhi: Architecture specific shadow of @address_hi + * @data: MSI message data (usually 16 bits) + * @arch_data: Architecture specific shadow of @data + */ struct msi_msg { - u32 address_lo; /* low 32 bits of msi message address */ - u32 address_hi; /* high 32 bits of msi message address */ - u32 data; /* 16 bits of msi message data */ + union { + u32 address_lo; + arch_msi_msg_addr_lo_t arch_addr_lo; + }; + union { + u32 address_hi; + arch_msi_msg_addr_hi_t arch_addr_hi; + }; + union { + u32 data; + arch_msi_msg_data_t arch_data; + }; }; extern int pci_msi_ignore_mask; @@ -243,7 +282,6 @@ struct msi_controller { #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN #include -#include struct irq_domain; struct irq_domain_ops; -- cgit v1.2.3 From cf83b2d2e2b64920bd6999b199dfa271d7e94cf8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 27 Oct 2020 23:10:54 -0700 Subject: bpf: Permit cond_resched for some iterators Commit e679654a704e ("bpf: Fix a rcu_sched stall issue with bpf task/task_file iterator") tries to fix rcu stalls warning which is caused by bpf task_file iterator when running "bpftool prog". rcu: INFO: rcu_sched self-detected stall on CPU rcu: \x097-....: (20999 ticks this GP) idle=302/1/0x4000000000000000 softirq=1508852/1508852 fqs=4913 \x09(t=21031 jiffies g=2534773 q=179750) NMI backtrace for cpu 7 CPU: 7 PID: 184195 Comm: bpftool Kdump: loaded Tainted: G W 5.8.0-00004-g68bfc7f8c1b4 #6 Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A17 05/03/2019 Call Trace: dump_stack+0x57/0x70 nmi_cpu_backtrace.cold+0x14/0x53 ? lapic_can_unplug_cpu.cold+0x39/0x39 nmi_trigger_cpumask_backtrace+0xb7/0xc7 rcu_dump_cpu_stacks+0xa2/0xd0 rcu_sched_clock_irq.cold+0x1ff/0x3d9 ? tick_nohz_handler+0x100/0x100 update_process_times+0x5b/0x90 tick_sched_timer+0x5e/0xf0 __hrtimer_run_queues+0x12a/0x2a0 hrtimer_interrupt+0x10e/0x280 __sysvec_apic_timer_interrupt+0x51/0xe0 asm_call_on_stack+0xf/0x20 sysvec_apic_timer_interrupt+0x6f/0x80 ... task_file_seq_next+0x52/0xa0 bpf_seq_read+0xb9/0x320 vfs_read+0x9d/0x180 ksys_read+0x5f/0xe0 do_syscall_64+0x38/0x60 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The fix is to limit the number of bpf program runs to be one million. This fixed the program in most cases. But we also found under heavy load, which can increase the wallclock time for bpf_seq_read(), the warning may still be possible. For example, calling bpf_delay() in the "while" loop of bpf_seq_read(), which will introduce artificial delay, the warning will show up in my qemu run. static unsigned q; volatile unsigned *p = &q; volatile unsigned long long ll; static void bpf_delay(void) { int i, j; for (i = 0; i < 10000; i++) for (j = 0; j < 10000; j++) ll += *p; } There are two ways to fix this issue. One is to reduce the above one million threshold to say 100,000 and hopefully rcu warning will not show up any more. Another is to introduce a target feature which enables bpf_seq_read() calling cond_resched(). This patch took second approach as the first approach may cause more -EAGAIN failures for read() syscalls. Note that not all bpf_iter targets can permit cond_resched() in bpf_seq_read() as some, e.g., netlink seq iterator, rcu read lock critical section spans through seq_ops->next() -> seq_ops->show() -> seq_ops->next(). For the kernel code with the above hack, "bpftool p" roughly takes 38 seconds to finish on my VM with 184 bpf program runs. Using the following command, I am able to collect the number of context switches: perf stat -e context-switches -- ./bpftool p >& log Without this patch, 69 context-switches With this patch, 75 context-switches This patch added additional 6 context switches, roughly every 6 seconds to reschedule, to avoid lengthy no-rescheduling which may cause the above RCU warnings. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201028061054.1411116-1-yhs@fb.com --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b16bf48aab6..2fffd30e13ac 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1294,6 +1294,10 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux, typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info); +enum bpf_iter_feature { + BPF_ITER_RESCHED = BIT(0), +}; + #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; @@ -1302,6 +1306,7 @@ struct bpf_iter_reg { bpf_iter_show_fdinfo_t show_fdinfo; bpf_iter_fill_link_info_t fill_link_info; u32 ctx_arg_info_size; + u32 feature; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; }; -- cgit v1.2.3 From 5c251e9dc0e127bac6fc5b8e6696363d2e35f515 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Oct 2020 14:32:27 -0600 Subject: signal: Add task_sigpending() helper This is in preparation for maintaining signal_pending() as the decider of whether or not a schedule() loop should be broken, or continue sleeping. This is different than the core signal use cases, which really need to know whether an actual signal is pending or not. task_sigpending() returns non-zero if TIF_SIGPENDING is set. Only core kernel use cases should care about the distinction between the two, make sure those use the task_sigpending() helper. Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20201026203230.386348-2-axboe@kernel.dk --- include/linux/sched/signal.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 1bad18a1d8ba..404145dc536e 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -353,11 +353,16 @@ static inline int restart_syscall(void) return -ERESTARTNOINTR; } -static inline int signal_pending(struct task_struct *p) +static inline int task_sigpending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } +static inline int signal_pending(struct task_struct *p) +{ + return task_sigpending(p); +} + static inline int __fatal_signal_pending(struct task_struct *p) { return unlikely(sigismember(&p->pending.signal, SIGKILL)); @@ -365,7 +370,7 @@ static inline int __fatal_signal_pending(struct task_struct *p) static inline int fatal_signal_pending(struct task_struct *p) { - return signal_pending(p) && __fatal_signal_pending(p); + return task_sigpending(p) && __fatal_signal_pending(p); } static inline int signal_pending_state(long state, struct task_struct *p) -- cgit v1.2.3 From 12db8b690010ccfadf9d0b49a1e1798e47dbbe1a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Oct 2020 14:32:28 -0600 Subject: entry: Add support for TIF_NOTIFY_SIGNAL Add TIF_NOTIFY_SIGNAL handling in the generic entry code, which if set, will return true if signal_pending() is used in a wait loop. That causes an exit of the loop so that notify_signal tracehooks can be run. If the wait loop is currently inside a system call, the system call is restarted once task_work has been processed. In preparation for only having arch_do_signal() handle syscall restarts if _TIF_SIGPENDING isn't set, rename it to arch_do_signal_or_restart(). Pass in a boolean that tells the architecture specific signal handler if it should attempt to get a signal, or just process a potential syscall restart. For !CONFIG_GENERIC_ENTRY archs, add the TIF_NOTIFY_SIGNAL handling to get_signal(). This is done to minimize the needed architecture changes to support this feature. Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20201026203230.386348-3-axboe@kernel.dk --- include/linux/entry-common.h | 11 ++++++++--- include/linux/entry-kvm.h | 4 ++-- include/linux/sched/signal.h | 11 ++++++++++- include/linux/tracehook.h | 27 +++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index efebbffcd5cc..c7bfac45f951 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -37,6 +37,10 @@ # define _TIF_UPROBE (0) #endif +#ifndef _TIF_NOTIFY_SIGNAL +# define _TIF_NOTIFY_SIGNAL (0) +#endif + /* * TIF flags handled in syscall_enter_from_usermode() */ @@ -69,7 +73,7 @@ #define EXIT_TO_USER_MODE_WORK \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \ + _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ARCH_EXIT_TO_USER_MODE_WORK) /** @@ -226,12 +230,13 @@ static __always_inline void arch_exit_to_user_mode(void) { } #endif /** - * arch_do_signal - Architecture specific signal delivery function + * arch_do_signal_or_restart - Architecture specific signal delivery function * @regs: Pointer to currents pt_regs + * @has_signal: actual signal to handle * * Invoked from exit_to_user_mode_loop(). */ -void arch_do_signal(struct pt_regs *regs); +void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal); /** * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit() diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 0cef17afb41a..9b93f8584ff7 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -11,8 +11,8 @@ # define ARCH_XFER_TO_GUEST_MODE_WORK (0) #endif -#define XFER_TO_GUEST_MODE_WORK \ - (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ +#define XFER_TO_GUEST_MODE_WORK \ + (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \ _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) struct kvm_vcpu; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 404145dc536e..bd5afa076189 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -360,6 +360,15 @@ static inline int task_sigpending(struct task_struct *p) static inline int signal_pending(struct task_struct *p) { +#if defined(TIF_NOTIFY_SIGNAL) + /* + * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same + * behavior in terms of ensuring that we break out of wait loops + * so that notify signal callbacks can be processed. + */ + if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) + return 1; +#endif return task_sigpending(p); } @@ -507,7 +516,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize); static inline void restore_saved_sigmask_unless(bool interrupted) { if (interrupted) - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); + WARN_ON(!signal_pending(current)); else restore_saved_sigmask(); } diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 36fb3bbed6b2..1e8caca92e1f 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -198,4 +198,31 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) blkcg_maybe_throttle_current(); } +/* + * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This + * is currently used by TWA_SIGNAL based task_work, which requires breaking + * wait loops to ensure that task_work is noticed and run. + */ +static inline void tracehook_notify_signal(void) +{ +#if defined(TIF_NOTIFY_SIGNAL) + clear_thread_flag(TIF_NOTIFY_SIGNAL); + smp_mb__after_atomic(); + if (current->task_works) + task_work_run(); +#endif +} + +/* + * Called when we have work to process from exit_to_user_mode_loop() + */ +static inline void set_notify_signal(struct task_struct *task) +{ +#if defined(TIF_NOTIFY_SIGNAL) + if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && + !wake_up_state(task, TASK_INTERRUPTIBLE)) + kick_process(task); +#endif +} + #endif /* */ -- cgit v1.2.3 From 5bc78502322a5e4eef3f1b2a2813751dc6434143 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 20 Oct 2020 09:47:13 -0400 Subject: sched: fix exit_mm vs membarrier (v4) exit_mm should issue memory barriers after user-space memory accesses, before clearing current->mm, to order user-space memory accesses performed prior to exit_mm before clearing tsk->mm, which has the effect of skipping the membarrier private expedited IPIs. exit_mm should also update the runqueue's membarrier_state so membarrier global expedited IPIs are not sent when they are not needed. The membarrier system call can be issued concurrently with do_exit if we have thread groups created with CLONE_VM but not CLONE_THREAD. Here is the scenario I have in mind: Two thread groups are created, A and B. Thread group B is created by issuing clone from group A with flag CLONE_VM set, but not CLONE_THREAD. Let's assume we have a single thread within each thread group (Thread A and Thread B). The AFAIU we can have: Userspace variables: int x = 0, y = 0; CPU 0 CPU 1 Thread A Thread B (in thread group A) (in thread group B) x = 1 barrier() y = 1 exit() exit_mm() current->mm = NULL; r1 = load y membarrier() skips CPU 0 (no IPI) because its current mm is NULL r2 = load x BUG_ON(r1 == 1 && r2 == 0) Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201020134715.13909-2-mathieu.desnoyers@efficios.com --- include/linux/sched/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index d5ece7a9a403..a91fb3ad9ec7 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -347,6 +347,8 @@ static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) extern void membarrier_exec_mmap(struct mm_struct *mm); +extern void membarrier_update_current_mm(struct mm_struct *next_mm); + #else #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS static inline void membarrier_arch_switch_mm(struct mm_struct *prev, @@ -361,6 +363,9 @@ static inline void membarrier_exec_mmap(struct mm_struct *mm) static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) { } +static inline void membarrier_update_current_mm(struct mm_struct *next_mm) +{ +} #endif #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 8d97e71811aaafe4abf611dc24822fd6e73df1a1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 1 Oct 2020 06:57:46 -0700 Subject: perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE Current perf can report both virtual addresses and physical addresses, but not the MMU page size. Without the MMU page size information of the utilized page, users cannot decide whether to promote/demote large pages to optimize memory usage. Add a new sample type for the data MMU page size. Current perf already has a facility to collect data virtual addresses. A page walker is required to walk the pages tables and calculate the MMU page size from a given virtual address. On some platforms, e.g., X86, the page walker is invoked in an NMI handler. So the page walker must be NMI-safe and low overhead. Besides, the page walker should work for both user and kernel virtual address. The existing generic page walker, e.g., walk_page_range_novma(), is a little bit complex and doesn't guarantee the NMI-safe. The follow_page() is only for user-virtual address. Add a new function perf_get_page_size() to walk the page tables and calculate the MMU page size. In the function: - Interrupts have to be disabled to prevent any teardown of the page tables. - For user space threads, the current->mm is used for the page walker. For kernel threads and the like, the current->mm is NULL. The init_mm is used for the page walker. The active_mm is not used here, because it can be NULL. Quote from Peter Zijlstra, "context_switch() can set prev->active_mm to NULL when it transfers it to @next. It does this before @current is updated. So an NMI that comes in between this active_mm swizzling and updating @current will see !active_mm." - The MMU page size is calculated from the page table level. The method should work for all architectures, but it has only been verified on X86. Should there be some architectures, which support perf, where the method doesn't work, it can be fixed later separately. Reporting the wrong page size would not be fatal for the architecture. Some under discussion features may impact the method in the future. Quote from Dave Hansen, "There are lots of weird things folks are trying to do with the page tables, like Address Space Isolation. For instance, if you get a perf NMI when running userspace, current->mm->pgd is *different* than the PGD that was in use when userspace was running. It's close enough today, but it might not stay that way." If the case happens later, lots of consecutive page walk errors will happen. The worst case is that lots of page-size '0' are returned, which would not be fatal. In the perf tool, a check is implemented to detect this case. Once it happens, a kernel patch could be implemented accordingly then. Suggested-by: Peter Zijlstra Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201001135749.2804-2-kan.liang@linux.intel.com --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0c19d279b97f..7e3785dd27d9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1034,6 +1034,7 @@ struct perf_sample_data { u64 phys_addr; u64 cgroup; + u64 data_page_size; } ____cacheline_aligned; /* default value for data source */ -- cgit v1.2.3 From 995f088efebe1eba0282a6ffa12411b37f8990c2 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 1 Oct 2020 06:57:49 -0700 Subject: perf/core: Add support for PERF_SAMPLE_CODE_PAGE_SIZE When studying code layout, it is useful to capture the page size of the sampled code address. Add a new sample type for code page size. The new sample type requires collecting the ip. The code page size can be calculated from the NMI-safe perf_get_page_size(). For large PEBS, it's very unlikely that the mapping is gone for the earlier PEBS records. Enable the feature for the large PEBS. The worst case is that page-size '0' is returned. Signed-off-by: Kan Liang Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201001135749.2804-5-kan.liang@linux.intel.com --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7e3785dd27d9..e533b03af053 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1035,6 +1035,7 @@ struct perf_sample_data { u64 phys_addr; u64 cgroup; u64 data_page_size; + u64 code_page_size; } ____cacheline_aligned; /* default value for data source */ -- cgit v1.2.3 From 51b646b2d9f84d6ff6300e3c1d09f2be4329a424 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Oct 2020 11:09:27 +0200 Subject: perf,mm: Handle non-page-table-aligned hugetlbfs A limited nunmber of architectures support hugetlbfs sizes that do not align with the page-tables (ARM64, Power, Sparc64). Add support for this to the generic perf_get_page_size() implementation, and also allow an architecture to override this implementation. This latter is only needed when it uses non-page-table aligned huge pages in its kernel map. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/perf_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e533b03af053..0defb526cd0c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1590,4 +1590,8 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now); +#ifdef CONFIG_MMU +extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr); +#endif + #endif /* _LINUX_PERF_EVENT_H */ -- cgit v1.2.3 From c6838eeef2fbc7e3e1f83759aa016ae6b70c643e Mon Sep 17 00:00:00 2001 From: "dmitry.torokhov@gmail.com" Date: Wed, 30 Sep 2020 15:47:13 -0700 Subject: HID: hid-input: occasionally report stylus battery even if not changed There are styluses that only report their battery status when they are touching the touchscreen; additionally we currently suppress battery reports if capacity has not changed. To help userspace recognize how long ago the device reported battery status, let's send the change event through if either capacity has changed, or at least 30 seconds have passed since last report we've let through. Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 58684657960b..b079146850e6 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -585,6 +585,7 @@ struct hid_device { /* device report descriptor */ __s32 battery_report_id; enum hid_battery_status battery_status; bool battery_avoid_query; + ktime_t battery_ratelimit_time; #endif unsigned long status; /* see STAT flags above */ -- cgit v1.2.3 From f54ec58fee837ec847cb8b50593e81bfaa46107f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 27 Oct 2020 21:12:12 +0100 Subject: wimax: move out to staging There are no known users of this driver as of October 2020, and it will be removed unless someone turns out to still need it in future releases. According to https://en.wikipedia.org/wiki/List_of_WiMAX_networks, there have been many public wimax networks, but it appears that many of these have migrated to LTE or discontinued their service altogether. As most PCs and phones lack WiMAX hardware support, the remaining networks tend to use standalone routers. These almost certainly run Linux, but not a modern kernel or the mainline wimax driver stack. NetworkManager appears to have dropped userspace support in 2015 https://bugzilla.gnome.org/show_bug.cgi?id=747846, the www.linuxwimax.org site had already shut down earlier. WiMax is apparently still being deployed on airport campus networks ("AeroMACS"), but in a frequency band that was not supported by the old Intel 2400m (used in Sandy Bridge laptops and earlier), which is the only driver using the kernel's wimax stack. Move all files into drivers/staging/wimax, including the uapi header files and documentation, to make it easier to remove it when it gets to that. Only minimal changes are made to the source files, in order to make it possible to port patches across the move. Also remove the MAINTAINERS entry that refers to a broken mailing list and website. Acked-by: Jakub Kicinski Acked-by: Greg Kroah-Hartman Acked-By: Inaky Perez-Gonzalez Acked-by: Johannes Berg Suggested-by: Inaky Perez-Gonzalez Signed-off-by: Arnd Bergmann --- include/linux/wimax/debug.h | 491 -------------------------------------------- 1 file changed, 491 deletions(-) delete mode 100644 include/linux/wimax/debug.h (limited to 'include/linux') diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h deleted file mode 100644 index cdae052bcdcd..000000000000 --- a/include/linux/wimax/debug.h +++ /dev/null @@ -1,491 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Linux WiMAX - * Collection of tools to manage debug operations. - * - * Copyright (C) 2005-2007 Intel Corporation - * Inaky Perez-Gonzalez - * - * Don't #include this file directly, read on! - * - * EXECUTING DEBUGGING ACTIONS OR NOT - * - * The main thing this framework provides is decission power to take a - * debug action (like printing a message) if the current debug level - * allows it. - * - * The decission power is at two levels: at compile-time (what does - * not make it is compiled out) and at run-time. The run-time - * selection is done per-submodule (as they are declared by the user - * of the framework). - * - * A call to d_test(L) (L being the target debug level) returns true - * if the action should be taken because the current debug levels - * allow it (both compile and run time). - * - * It follows that a call to d_test() that can be determined to be - * always false at compile time will get the code depending on it - * compiled out by optimization. - * - * DEBUG LEVELS - * - * It is up to the caller to define how much a debugging level is. - * - * Convention sets 0 as "no debug" (so an action marked as debug level 0 - * will always be taken). The increasing debug levels are used for - * increased verbosity. - * - * USAGE - * - * Group the code in modules and submodules inside each module [which - * in most cases maps to Linux modules and .c files that compose - * those]. - * - * For each module, there is: - * - * - a MODULENAME (single word, legal C identifier) - * - * - a debug-levels.h header file that declares the list of - * submodules and that is included by all .c files that use - * the debugging tools. The file name can be anything. - * - * - some (optional) .c code to manipulate the runtime debug levels - * through debugfs. - * - * The debug-levels.h file would look like: - * - * #ifndef __debug_levels__h__ - * #define __debug_levels__h__ - * - * #define D_MODULENAME modulename - * #define D_MASTER 10 - * - * #include - * - * enum d_module { - * D_SUBMODULE_DECLARE(submodule_1), - * D_SUBMODULE_DECLARE(submodule_2), - * ... - * D_SUBMODULE_DECLARE(submodule_N) - * }; - * - * #endif - * - * D_MASTER is the maximum compile-time debug level; any debug actions - * above this will be out. D_MODULENAME is the module name (legal C - * identifier), which has to be unique for each module (to avoid - * namespace collisions during linkage). Note those #defines need to - * be done before #including debug.h - * - * We declare N different submodules whose debug level can be - * independently controlled during runtime. - * - * In a .c file of the module (and only in one of them), define the - * following code: - * - * struct d_level D_LEVEL[] = { - * D_SUBMODULE_DEFINE(submodule_1), - * D_SUBMODULE_DEFINE(submodule_2), - * ... - * D_SUBMODULE_DEFINE(submodule_N), - * }; - * size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); - * - * Externs for d_level_MODULENAME and d_level_size_MODULENAME are used - * and declared in this file using the D_LEVEL and D_LEVEL_SIZE macros - * #defined also in this file. - * - * To manipulate from user space the levels, create a debugfs dentry - * and then register each submodule with: - * - * d_level_register_debugfs("PREFIX_", submodule_X, parent); - * - * Where PREFIX_ is a name of your chosing. This will create debugfs - * file with a single numeric value that can be use to tweak it. To - * remove the entires, just use debugfs_remove_recursive() on 'parent'. - * - * NOTE: remember that even if this will show attached to some - * particular instance of a device, the settings are *global*. - * - * On each submodule (for example, .c files), the debug infrastructure - * should be included like this: - * - * #define D_SUBMODULE submodule_x // matches one in debug-levels.h - * #include "debug-levels.h" - * - * after #including all your include files. - * - * Now you can use the d_*() macros below [d_test(), d_fnstart(), - * d_fnend(), d_printf(), d_dump()]. - * - * If their debug level is greater than D_MASTER, they will be - * compiled out. - * - * If their debug level is lower or equal than D_MASTER but greater - * than the current debug level of their submodule, they'll be - * ignored. - * - * Otherwise, the action will be performed. - */ -#ifndef __debug__h__ -#define __debug__h__ - -#include -#include - -struct device; - -/* Backend stuff */ - -/* - * Debug backend: generate a message header from a 'struct device' - * - * @head: buffer where to place the header - * @head_size: length of @head - * @dev: pointer to device used to generate a header from. If NULL, - * an empty ("") header is generated. - */ -static inline -void __d_head(char *head, size_t head_size, - struct device *dev) -{ - if (dev == NULL) - head[0] = 0; - else if ((unsigned long)dev < 4096) { - printk(KERN_ERR "E: Corrupt dev %p\n", dev); - WARN_ON(1); - } else - snprintf(head, head_size, "%s %s: ", - dev_driver_string(dev), dev_name(dev)); -} - - -/* - * Debug backend: log some message if debugging is enabled - * - * @l: intended debug level - * @tag: tag to prefix the message with - * @dev: 'struct device' associated to this message - * @f: printf-like format and arguments - * - * Note this is optimized out if it doesn't pass the compile-time - * check; however, it is *always* compiled. This is useful to make - * sure the printf-like formats and variables are always checked and - * they don't get bit rot if you have all the debugging disabled. - */ -#define _d_printf(l, tag, dev, f, a...) \ -do { \ - char head[64]; \ - if (!d_test(l)) \ - break; \ - __d_head(head, sizeof(head), dev); \ - printk(KERN_ERR "%s%s%s: " f, head, __func__, tag, ##a); \ -} while (0) - - -/* - * CPP syntactic sugar to generate A_B like symbol names when one of - * the arguments is a preprocessor #define. - */ -#define __D_PASTE__(varname, modulename) varname##_##modulename -#define __D_PASTE(varname, modulename) (__D_PASTE__(varname, modulename)) -#define _D_SUBMODULE_INDEX(_name) (D_SUBMODULE_DECLARE(_name)) - - -/* - * Store a submodule's runtime debug level and name - */ -struct d_level { - u8 level; - const char *name; -}; - - -/* - * List of available submodules and their debug levels - * - * We call them d_level_MODULENAME and d_level_size_MODULENAME; the - * macros D_LEVEL and D_LEVEL_SIZE contain the name already for - * convenience. - * - * This array and the size are defined on some .c file that is part of - * the current module. - */ -#define D_LEVEL __D_PASTE(d_level, D_MODULENAME) -#define D_LEVEL_SIZE __D_PASTE(d_level_size, D_MODULENAME) - -extern struct d_level D_LEVEL[]; -extern size_t D_LEVEL_SIZE; - - -/* - * Frontend stuff - * - * - * Stuff you need to declare prior to using the actual "debug" actions - * (defined below). - */ - -#ifndef D_MODULENAME -#error D_MODULENAME is not defined in your debug-levels.h file -/** - * D_MODULE - Name of the current module - * - * #define in your module's debug-levels.h, making sure it is - * unique. This has to be a legal C identifier. - */ -#define D_MODULENAME undefined_modulename -#endif - - -#ifndef D_MASTER -#warning D_MASTER not defined, but debug.h included! [see docs] -/** - * D_MASTER - Compile time maximum debug level - * - * #define in your debug-levels.h file to the maximum debug level the - * runtime code will be allowed to have. This allows you to provide a - * main knob. - * - * Anything above that level will be optimized out of the compile. - * - * Defaults to zero (no debug code compiled in). - * - * Maximum one definition per module (at the debug-levels.h file). - */ -#define D_MASTER 0 -#endif - -#ifndef D_SUBMODULE -#error D_SUBMODULE not defined, but debug.h included! [see docs] -/** - * D_SUBMODULE - Name of the current submodule - * - * #define in your submodule .c file before #including debug-levels.h - * to the name of the current submodule as previously declared and - * defined with D_SUBMODULE_DECLARE() (in your module's - * debug-levels.h) and D_SUBMODULE_DEFINE(). - * - * This is used to provide runtime-control over the debug levels. - * - * Maximum one per .c file! Can be shared among different .c files - * (meaning they belong to the same submodule categorization). - */ -#define D_SUBMODULE undefined_module -#endif - - -/** - * D_SUBMODULE_DECLARE - Declare a submodule for runtime debug level control - * - * @_name: name of the submodule, restricted to the chars that make up a - * valid C identifier ([a-zA-Z0-9_]). - * - * Declare in the module's debug-levels.h header file as: - * - * enum d_module { - * D_SUBMODULE_DECLARE(submodule_1), - * D_SUBMODULE_DECLARE(submodule_2), - * D_SUBMODULE_DECLARE(submodule_3), - * }; - * - * Some corresponding .c file needs to have a matching - * D_SUBMODULE_DEFINE(). - */ -#define D_SUBMODULE_DECLARE(_name) __D_SUBMODULE_##_name - - -/** - * D_SUBMODULE_DEFINE - Define a submodule for runtime debug level control - * - * @_name: name of the submodule, restricted to the chars that make up a - * valid C identifier ([a-zA-Z0-9_]). - * - * Use once per module (in some .c file) as: - * - * static - * struct d_level d_level_SUBMODULENAME[] = { - * D_SUBMODULE_DEFINE(submodule_1), - * D_SUBMODULE_DEFINE(submodule_2), - * D_SUBMODULE_DEFINE(submodule_3), - * }; - * size_t d_level_size_SUBDMODULENAME = ARRAY_SIZE(d_level_SUBDMODULENAME); - * - * Matching D_SUBMODULE_DECLARE()s have to be present in a - * debug-levels.h header file. - */ -#define D_SUBMODULE_DEFINE(_name) \ -[__D_SUBMODULE_##_name] = { \ - .level = 0, \ - .name = #_name \ -} - - - -/* The actual "debug" operations */ - - -/** - * d_test - Returns true if debugging should be enabled - * - * @l: intended debug level (unsigned) - * - * If the master debug switch is enabled and the current settings are - * higher or equal to the requested level, then debugging - * output/actions should be enabled. - * - * NOTE: - * - * This needs to be coded so that it can be evaluated in compile - * time; this is why the ugly BUG_ON() is placed in there, so the - * D_MASTER evaluation compiles all out if it is compile-time false. - */ -#define d_test(l) \ -({ \ - unsigned __l = l; /* type enforcer */ \ - (D_MASTER) >= __l \ - && ({ \ - BUG_ON(_D_SUBMODULE_INDEX(D_SUBMODULE) >= D_LEVEL_SIZE);\ - D_LEVEL[_D_SUBMODULE_INDEX(D_SUBMODULE)].level >= __l; \ - }); \ -}) - - -/** - * d_fnstart - log message at function start if debugging enabled - * - * @l: intended debug level - * @_dev: 'struct device' pointer, NULL if none (for context) - * @f: printf-like format and arguments - */ -#define d_fnstart(l, _dev, f, a...) _d_printf(l, " FNSTART", _dev, f, ## a) - - -/** - * d_fnend - log message at function end if debugging enabled - * - * @l: intended debug level - * @_dev: 'struct device' pointer, NULL if none (for context) - * @f: printf-like format and arguments - */ -#define d_fnend(l, _dev, f, a...) _d_printf(l, " FNEND", _dev, f, ## a) - - -/** - * d_printf - log message if debugging enabled - * - * @l: intended debug level - * @_dev: 'struct device' pointer, NULL if none (for context) - * @f: printf-like format and arguments - */ -#define d_printf(l, _dev, f, a...) _d_printf(l, "", _dev, f, ## a) - - -/** - * d_dump - log buffer hex dump if debugging enabled - * - * @l: intended debug level - * @_dev: 'struct device' pointer, NULL if none (for context) - * @f: printf-like format and arguments - */ -#define d_dump(l, dev, ptr, size) \ -do { \ - char head[64]; \ - if (!d_test(l)) \ - break; \ - __d_head(head, sizeof(head), dev); \ - print_hex_dump(KERN_ERR, head, 0, 16, 1, \ - ((void *) ptr), (size), 0); \ -} while (0) - - -/** - * Export a submodule's debug level over debugfs as PREFIXSUBMODULE - * - * @prefix: string to prefix the name with - * @submodule: name of submodule (not a string, just the name) - * @dentry: debugfs parent dentry - * - * For removing, just use debugfs_remove_recursive() on the parent. - */ -#define d_level_register_debugfs(prefix, name, parent) \ -({ \ - debugfs_create_u8( \ - prefix #name, 0600, parent, \ - &(D_LEVEL[__D_SUBMODULE_ ## name].level)); \ -}) - - -static inline -void d_submodule_set(struct d_level *d_level, size_t d_level_size, - const char *submodule, u8 level, const char *tag) -{ - struct d_level *itr, *top; - int index = -1; - - for (itr = d_level, top = itr + d_level_size; itr < top; itr++) { - index++; - if (itr->name == NULL) { - printk(KERN_ERR "%s: itr->name NULL?? (%p, #%d)\n", - tag, itr, index); - continue; - } - if (!strcmp(itr->name, submodule)) { - itr->level = level; - return; - } - } - printk(KERN_ERR "%s: unknown submodule %s\n", tag, submodule); -} - - -/** - * d_parse_params - Parse a string with debug parameters from the - * command line - * - * @d_level: level structure (D_LEVEL) - * @d_level_size: number of items in the level structure - * (D_LEVEL_SIZE). - * @_params: string with the parameters; this is a space (not tab!) - * separated list of NAME:VALUE, where value is the debug level - * and NAME is the name of the submodule. - * @tag: string for error messages (example: MODULE.ARGNAME). - */ -static inline -void d_parse_params(struct d_level *d_level, size_t d_level_size, - const char *_params, const char *tag) -{ - char submodule[130], *params, *params_orig, *token, *colon; - unsigned level, tokens; - - if (_params == NULL) - return; - params_orig = kstrdup(_params, GFP_KERNEL); - params = params_orig; - while (1) { - token = strsep(¶ms, " "); - if (token == NULL) - break; - if (*token == '\0') /* eat joint spaces */ - continue; - /* kernel's sscanf %s eats until whitespace, so we - * replace : by \n so it doesn't get eaten later by - * strsep */ - colon = strchr(token, ':'); - if (colon != NULL) - *colon = '\n'; - tokens = sscanf(token, "%s\n%u", submodule, &level); - if (colon != NULL) - *colon = ':'; /* set back, for error messages */ - if (tokens == 2) - d_submodule_set(d_level, d_level_size, - submodule, level, tag); - else - printk(KERN_ERR "%s: can't parse '%s' as a " - "SUBMODULE:LEVEL (%d tokens)\n", - tag, token, tokens); - } - kfree(params_orig); -} - -#endif /* #ifndef __debug__h__ */ -- cgit v1.2.3 From 080b6f40763565f65ebb9540219c71ce885cf568 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 28 Oct 2020 18:15:05 +0100 Subject: bpf: Don't rely on GCC __attribute__((optimize)) to disable GCSE Commit 3193c0836 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") introduced a __no_fgcse macro that expands to a function scope __attribute__((optimize("-fno-gcse"))), to disable a GCC specific optimization that was causing trouble on x86 builds, and was not expected to have any positive effect in the first place. However, as the GCC manual documents, __attribute__((optimize)) is not for production use, and results in all other optimization options to be forgotten for the function in question. This can cause all kinds of trouble, but in one particular reported case, it causes -fno-asynchronous-unwind-tables to be disregarded, resulting in .eh_frame info to be emitted for the function. This reverts commit 3193c0836, and instead, it disables the -fgcse optimization for the entire source file, but only when building for X86 using GCC with CONFIG_BPF_JIT_ALWAYS_ON disabled. Note that the original commit states that CONFIG_RETPOLINE=n triggers the issue, whereas CONFIG_RETPOLINE=y performs better without the optimization, so it is kept disabled in both cases. Fixes: 3193c0836f20 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") Signed-off-by: Ard Biesheuvel Signed-off-by: Alexei Starovoitov Tested-by: Geert Uytterhoeven Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/lkml/CAMuHMdUg0WJHEcq6to0-eODpXPOywLot6UD2=GFHpzoj_hCoBQ@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20201028171506.15682-2-ardb@kernel.org --- include/linux/compiler-gcc.h | 2 -- include/linux/compiler_types.h | 4 ---- 2 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index d1e3c6896b71..5deb37024574 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -175,5 +175,3 @@ #else #define __diag_GCC_8(s) #endif - -#define __no_fgcse __attribute__((optimize("-fno-gcse"))) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6e390d58a9f8..ac3fa37a84f9 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -247,10 +247,6 @@ struct ftrace_likely_data { #define asm_inline asm #endif -#ifndef __no_fgcse -# define __no_fgcse -#endif - /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) -- cgit v1.2.3 From 46d6c5ae953cc0be38efd0e469284df7c4328cf8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 29 Oct 2020 03:56:06 +0100 Subject: netfilter: use actual socket sk rather than skb sk when routing harder If netfilter changes the packet mark when mangling, the packet is rerouted using the route_me_harder set of functions. Prior to this commit, there's one big difference between route_me_harder and the ordinary initial routing functions, described in the comment above __ip_queue_xmit(): /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, That function goes on to correctly make use of sk->sk_bound_dev_if, rather than skb->sk->sk_bound_dev_if. And indeed the comment is true: a tunnel will receive a packet in ndo_start_xmit with an initial skb->sk. It will make some transformations to that packet, and then it will send the encapsulated packet out of a *new* socket. That new socket will basically always have a different sk_bound_dev_if (otherwise there'd be a routing loop). So for the purposes of routing the encapsulated packet, the routing information as it pertains to the socket should come from that socket's sk, rather than the packet's original skb->sk. For that reason __ip_queue_xmit() and related functions all do the right thing. One might argue that all tunnels should just call skb_orphan(skb) before transmitting the encapsulated packet into the new socket. But tunnels do *not* do this -- and this is wisely avoided in skb_scrub_packet() too -- because features like TSQ rely on skb->destructor() being called when that buffer space is truely available again. Calling skb_orphan(skb) too early would result in buffers filling up unnecessarily and accounting info being all wrong. Instead, additional routing must take into account the new sk, just as __ip_queue_xmit() notes. So, this commit addresses the problem by fishing the correct sk out of state->sk -- it's already set properly in the call to nf_hook() in __ip_local_out(), which receives the sk as part of its normal functionality. So we make sure to plumb state->sk through the various route_me_harder functions, and then make correct use of it following the example of __ip_queue_xmit(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason A. Donenfeld Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4.h | 2 +- include/linux/netfilter_ipv6.h | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 082e2c41b7ff..5b70ca868bb1 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -16,7 +16,7 @@ struct ip_rt_info { u_int32_t mark; }; -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type); +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type); struct nf_queue_entry; diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 9b67394471e1..48314ade1506 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -42,7 +42,7 @@ struct nf_ipv6_ops { #if IS_MODULE(CONFIG_IPV6) int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); - int (*route_me_harder)(struct net *net, struct sk_buff *skb); + int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*dev_get_saddr)(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); @@ -143,9 +143,9 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk, #endif } -int ip6_route_me_harder(struct net *net, struct sk_buff *skb); +int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb); -static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) +static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); @@ -153,9 +153,9 @@ static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) if (!v6_ops) return -EHOSTUNREACH; - return v6_ops->route_me_harder(net, skb); + return v6_ops->route_me_harder(net, sk, skb); #elif IS_BUILTIN(CONFIG_IPV6) - return ip6_route_me_harder(net, skb); + return ip6_route_me_harder(net, sk, skb); #else return -EHOSTUNREACH; #endif -- cgit v1.2.3 From c0391b6ab810381df632677a1dcbbbbd63d05b6d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 29 Oct 2020 13:50:03 +0100 Subject: netfilter: nf_tables: missing validation from the abort path If userspace does not include the trailing end of batch message, then nfnetlink aborts the transaction. This allows to check that ruleset updates trigger no errors. After this patch, invoking this command from the prerouting chain: # nft -c add rule x y fib saddr . oif type local fails since oif is not supported there. This patch fixes the lack of rule validation from the abort/check path to catch configuration errors such as the one above. Fixes: a654de8fdc18 ("netfilter: nf_tables: fix chain dependency validation") Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 89016d08f6a2..f6267e2883f2 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -24,6 +24,12 @@ struct nfnl_callback { const u_int16_t attr_count; /* number of nlattr's */ }; +enum nfnl_abort_action { + NFNL_ABORT_NONE = 0, + NFNL_ABORT_AUTOLOAD, + NFNL_ABORT_VALIDATE, +}; + struct nfnetlink_subsystem { const char *name; __u8 subsys_id; /* nfnetlink subsystem ID */ @@ -31,7 +37,8 @@ struct nfnetlink_subsystem { const struct nfnl_callback *cb; /* callback for individual types */ struct module *owner; int (*commit)(struct net *net, struct sk_buff *skb); - int (*abort)(struct net *net, struct sk_buff *skb, bool autoload); + int (*abort)(struct net *net, struct sk_buff *skb, + enum nfnl_abort_action action); void (*cleanup)(struct net *net); bool (*valid_genid)(struct net *net, u32 genid); }; -- cgit v1.2.3 From 77f6c0b87479c4578ac0798fc249637092ac45a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 24 Sep 2020 12:30:50 +0200 Subject: timekeeping: remove arch_gettimeoffset With Arm EBSA110 gone, nothing uses it any more, so the corresponding code and the Kconfig option can be removed. Acked-by: Thomas Gleixner Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann --- include/linux/time.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index b142cb5f5a53..16cf4522d6f3 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -21,19 +21,6 @@ extern time64_t mktime64(const unsigned int year, const unsigned int mon, const unsigned int day, const unsigned int hour, const unsigned int min, const unsigned int sec); -/* Some architectures do not supply their own clocksource. - * This is mainly the case in architectures that get their - * inter-tick times by reading the counter on their interval - * timer. Since these timers wrap every tick, they're not really - * useful as clocksources. Wrapping them to act like one is possible - * but not very efficient. So we provide a callout these arches - * can implement for use with the jiffies clocksource to provide - * finer then tick granular time. - */ -#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET -extern u32 (*arch_gettimeoffset)(void); -#endif - #ifdef CONFIG_POSIX_TIMERS extern void clear_itimer(void); #else -- cgit v1.2.3 From b3550164a19d62e515af6cacb5a31f0b2b3f9501 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 24 Sep 2020 15:21:43 +0200 Subject: timekeeping: add CONFIG_LEGACY_TIMER_TICK All platforms that currently do not use generic clockevents roughly call the same set of functions in their timer interrupts: xtime_update(), update_process_times() and profile_tick(), sometimes in a different sequence. Add a helper function that performs all three of them, to make the callers more uniform and simplify the interface. Reviewed-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7f7e4a3f4394..3670cb1670ff 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -12,6 +12,7 @@ extern int timekeeping_suspended; /* Architecture timer tick functions: */ extern void update_process_times(int user); extern void xtime_update(unsigned long ticks); +extern void legacy_timer_tick(unsigned long ticks); /* * Get and set timeofday -- cgit v1.2.3 From 56cc7b8acfb7c763f71c0492fa8da01dca7c1760 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 24 Sep 2020 17:39:11 +0200 Subject: timekeeping: remove xtime_update There are no more users of xtime_update aside from legacy_timer_tick(), so fold it into that function and remove the declaration. update_process_times() is now only called inside of the kernel/time/ code, so the declaration can be moved there. Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann --- include/linux/timekeeping.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3670cb1670ff..d47009611109 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -10,8 +10,6 @@ void timekeeping_init(void); extern int timekeeping_suspended; /* Architecture timer tick functions: */ -extern void update_process_times(int user); -extern void xtime_update(unsigned long ticks); extern void legacy_timer_tick(unsigned long ticks); /* -- cgit v1.2.3 From e38d86b354f96e3ed6f5e6c7dbb442d7107fc0bd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:08 +0800 Subject: sctp: add the error cause for new encapsulation port restart This patch is to add the function to make the abort chunk with the error cause for new encapsulation port restart, defined on Section 4.4 in draft-tuexen-tsvwg-sctp-udp-encaps-cons-03. v1->v2: - no change. v2->v3: - no need to call htons() when setting nep.cur_port/new_port. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- include/linux/sctp.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 76731230bbc5..bb1926589693 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -482,11 +482,13 @@ enum sctp_error { * 11 Restart of an association with new addresses * 12 User Initiated Abort * 13 Protocol Violation + * 14 Restart of an Association with New Encapsulation Port */ SCTP_ERROR_RESTART = cpu_to_be16(0x0b), SCTP_ERROR_USER_ABORT = cpu_to_be16(0x0c), SCTP_ERROR_PROTO_VIOLATION = cpu_to_be16(0x0d), + SCTP_ERROR_NEW_ENCAP_PORT = cpu_to_be16(0x0e), /* ADDIP Section 3.3 New Error Causes * @@ -793,4 +795,22 @@ enum { SCTP_FLOWLABEL_VAL_MASK = 0xfffff }; +/* UDP Encapsulation + * draft-tuexen-tsvwg-sctp-udp-encaps-cons-03.html#section-4-4 + * + * The error cause indicating an "Restart of an Association with + * New Encapsulation Port" + * + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Cause Code = 14 | Cause Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Current Encapsulation Port | New Encapsulation Port | + * +-------------------------------+-------------------------------+ + */ +struct sctp_new_encap_port_hdr { + __be16 cur_port; + __be16 new_port; +}; + #endif /* __LINUX_SCTP_H__ */ -- cgit v1.2.3 From 1887023a5e96f98249574e0785b7aa2e5742ca68 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Wed, 28 Oct 2020 11:15:40 -0600 Subject: net: phy: marvell: add special handling of Finisar modules with 88E1111 The Finisar FCLF8520P2BTL 1000BaseT SFP module uses a Marvel 88E1111 PHY with a modified PHY ID. Add support for this ID using the 88E1111 methods. By default these modules do not have 1000BaseX auto-negotiation enabled, which is not generally desirable with Linux networking drivers. Add handling to enable 1000BaseX auto-negotiation when these modules are used in 1000BaseX mode. Also, some special handling is required to ensure that 1000BaseT auto-negotiation is enabled properly when desired. Based on existing handling in the AMD xgbe driver and the information in the Finisar FAQ: https://www.finisar.com/sites/default/files/resources/an-2036_1000base-t_sfp_faqreve1.pdf Signed-off-by: Robert Hancock Reviewed-by: Russell King Link: https://lore.kernel.org/r/20201028171540.1700032-1-robert.hancock@calian.com Signed-off-by: Jakub Kicinski --- include/linux/marvell_phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index ff7b7607c8cf..52b1610eae68 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -25,6 +25,9 @@ #define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 +/* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ +#define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 + /* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do * not have a model ID. So the switch driver traps reads to the ID2 * register and returns the switch family ID -- cgit v1.2.3 From 955062b03fa62b802a1ee34fbb04e39f7a70ae73 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 29 Oct 2020 01:38:31 +0200 Subject: net: bridge: mcast: add support for raw L2 multicast groups Extend the bridge multicast control and data path to configure routes for L2 (non-IP) multicast groups. The uapi struct br_mdb_entry union u is extended with another variant, mac_addr, which does not change the structure size, and which is valid when the proto field is zero. To be compatible with the forwarding code that is already in place, which acts as an IGMP/MLD snooping bridge with querier capabilities, we need to declare that for L2 MDB entries (for which there exists no such thing as IGMP/MLD snooping/querying), that there is always a querier. Otherwise, these entries would be flooded to all bridge ports and not just to those that are members of the L2 multicast group. Needless to say, only permanent L2 multicast groups can be installed on a bridge port. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20201028233831.610076-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 556caed00258..b979005ea39c 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -25,6 +25,7 @@ struct br_ip { #if IS_ENABLED(CONFIG_IPV6) struct in6_addr ip6; #endif + unsigned char mac_addr[ETH_ALEN]; } dst; __be16 proto; __u16 vid; -- cgit v1.2.3 From ccf0a4b7fc688561428290265e4effde41446668 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 29 Oct 2020 16:39:48 +0100 Subject: netfilter: ipset: Add bucketsize parameter to all hash types The parameter defines the upper limit in any hash bucket at adding new entries from userspace - if the limit would be exceeded, ipset doubles the hash size and rehashes. It means the set may consume more memory but gives faster evaluation at matching in the set. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ab192720e2d6..46d9a0c26c67 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -198,6 +198,9 @@ struct ip_set_region { u32 elements; /* Number of elements vs timeout */ }; +/* The max revision number supported by any set type + 1 */ +#define IPSET_REVISION_MAX 9 + /* The core set type structure */ struct ip_set_type { struct list_head list; @@ -215,6 +218,8 @@ struct ip_set_type { u8 family; /* Type revisions */ u8 revision_min, revision_max; + /* Revision-specific supported (create) flags */ + u8 create_flags[IPSET_REVISION_MAX+1]; /* Set features to control swapping */ u16 features; -- cgit v1.2.3 From 3ebc0ef06e4a78522e9d1488dcf61b7d8fcfb792 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 11 Sep 2020 16:33:42 +0200 Subject: serial: s3c: Update path of Samsung S3C machine file Correct the path to Samsung S3C24xx machine file, mentioned in documentation. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20200911143343.498-2-krzk@kernel.org --- include/linux/serial_s3c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h index 463ed28d2b27..ca2c5393dc6b 100644 --- a/include/linux/serial_s3c.h +++ b/include/linux/serial_s3c.h @@ -254,7 +254,7 @@ * serial port * * the pointer is setup by the machine specific initialisation from the - * arch/arm/mach-s3c2410/ directory. + * arch/arm/mach-s3c/ directory. */ struct s3c2410_uartcfg { -- cgit v1.2.3 From 3a096c2bda7d2f0c0866d466447c41bc4b8ecdec Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:32 +0200 Subject: iio: fix a kernel-doc markup A function has a different name between their prototype and its kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/46622c3bdcffb76e79719f0fe5011c2952960b32.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Cameron --- include/linux/iio/trigger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index cad8325903f9..f930c4ccf378 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -97,7 +97,7 @@ static inline struct iio_trigger *iio_trigger_get(struct iio_trigger *trig) } /** - * iio_device_set_drvdata() - Set trigger driver data + * iio_trigger_set_drvdata() - Set trigger driver data * @trig: IIO trigger structure * @data: Driver specific data * -- cgit v1.2.3 From d3fd65484c78102bac92f176c53537a7691a38b2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 29 Oct 2020 18:29:59 +0100 Subject: net: core: add dev_sw_netstats_tx_add Add dev_sw_netstats_tx_add(), complementing already existing dev_sw_netstats_rx_add(). Other than dev_sw_netstats_rx_add allow to pass the number of packets as function argument. Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..568fab708ac6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2557,6 +2557,18 @@ static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int l u64_stats_update_end(&tstats->syncp); } +static inline void dev_sw_netstats_tx_add(struct net_device *dev, + unsigned int packets, + unsigned int len) +{ + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += len; + tstats->tx_packets += packets; + u64_stats_update_end(&tstats->syncp); +} + static inline void dev_lstats_add(struct net_device *dev, unsigned int len) { struct pcpu_lstats *lstats = this_cpu_ptr(dev->lstats); -- cgit v1.2.3 From 81b01894d792db8d4746c9c1b39c6e2a94fa0a04 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 29 Oct 2020 18:31:21 +0100 Subject: net: core: add devm_netdev_alloc_pcpu_stats We have netdev_alloc_pcpu_stats(), and we have devm_alloc_percpu(). Add a managed version of netdev_alloc_pcpu_stats, e.g. for allocating the per-cpu stats in the probe() callback of a driver. It needs to be a macro for dealing properly with the type argument. Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 568fab708ac6..a53ed2d1ed1d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2596,6 +2596,20 @@ static inline void dev_lstats_add(struct net_device *dev, unsigned int len) #define netdev_alloc_pcpu_stats(type) \ __netdev_alloc_pcpu_stats(type, GFP_KERNEL) +#define devm_netdev_alloc_pcpu_stats(dev, type) \ +({ \ + typeof(type) __percpu *pcpu_stats = devm_alloc_percpu(dev, type);\ + if (pcpu_stats) { \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ + typeof(type) *stat; \ + stat = per_cpu_ptr(pcpu_stats, __cpu); \ + u64_stats_init(&stat->syncp); \ + } \ + } \ + pcpu_stats; \ +}) + enum netdev_lag_tx_type { NETDEV_LAG_TX_TYPE_UNKNOWN, NETDEV_LAG_TX_TYPE_RANDOM, -- cgit v1.2.3 From 1c552e08b29895d31bbf82bdb549811cfde31db4 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:02 -0700 Subject: firmware: ti_sci: rm: Add support for tx_tdtype parameter for tx channel The system controller's resource manager have support for configuring the TDTYPE of TCHAN_CFG register on j721e. With this parameter the teardown completion can be controlled: TDTYPE == 0: Return without waiting for peer to complete the teardown TDTYPE == 1: Wait for peer to complete the teardown Signed-off-by: Peter Ujfalusi Reviewed-by: Tero Kristo Tested-by: Keerthy Reviewed-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index cf27b080e148..d254d99fd45b 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -345,6 +345,7 @@ struct ti_sci_msg_rm_udmap_tx_ch_cfg { #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID BIT(11) #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_CREDIT_COUNT_VALID BIT(12) #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FDEPTH_VALID BIT(13) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_TDTYPE_VALID BIT(15) u16 nav_id; u16 index; u8 tx_pause_on_err; @@ -362,6 +363,7 @@ struct ti_sci_msg_rm_udmap_tx_ch_cfg { u16 fdepth; u8 tx_sched_priority; u8 tx_burst_size; + u8 tx_tdtype; }; /** -- cgit v1.2.3 From 967a020bd3deddf9a0af73aeb4d4b46d90030937 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:03 -0700 Subject: firmware: ti_sci: Use struct ti_sci_resource_desc in get_range ops Use the ti_sci_resource_desc directly and update it's start and num members directly instead of requiring individual parameters for them. This will allow easy extension of the RM parameters without changing API. Signed-off-by: Peter Ujfalusi Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index d254d99fd45b..6cd537db4d33 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -195,6 +195,18 @@ struct ti_sci_clk_ops { u64 *current_freq); }; +/** + * struct ti_sci_resource_desc - Description of TI SCI resource instance range. + * @start: Start index of the resource. + * @num: Number of resources. + * @res_map: Bitmap to manage the allocation of these resources. + */ +struct ti_sci_resource_desc { + u16 start; + u16 num; + unsigned long *res_map; +}; + /** * struct ti_sci_rm_core_ops - Resource management core operations * @get_range: Get a range of resources belonging to ti sci host. @@ -209,15 +221,15 @@ struct ti_sci_clk_ops { * - dev_id: TISCI device ID. * - subtype: Resource assignment subtype that is being requested * from the given device. - * - range_start: Start index of the resource range - * - range_end: Number of resources in the range + * - desc: Pointer to ti_sci_resource_desc to be updated with the resource + * range start index and number of resources */ struct ti_sci_rm_core_ops { int (*get_range)(const struct ti_sci_handle *handle, u32 dev_id, - u8 subtype, u16 *range_start, u16 *range_num); + u8 subtype, struct ti_sci_resource_desc *desc); int (*get_range_from_shost)(const struct ti_sci_handle *handle, u32 dev_id, u8 subtype, u8 s_host, - u16 *range_start, u16 *range_num); + struct ti_sci_resource_desc *desc); }; #define TI_SCI_RESASG_SUBTYPE_IR_OUTPUT 0 @@ -522,18 +534,6 @@ struct ti_sci_handle { #define TI_SCI_RESOURCE_NULL 0xffff -/** - * struct ti_sci_resource_desc - Description of TI SCI resource instance range. - * @start: Start index of the resource. - * @num: Number of resources. - * @res_map: Bitmap to manage the allocation of these resources. - */ -struct ti_sci_resource_desc { - u16 start; - u16 num; - unsigned long *res_map; -}; - /** * struct ti_sci_resource - Structure representing a resource assigned * to a device. -- cgit v1.2.3 From 519c5c0c558b529f835c9bb30f9a1eb2034d585c Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:03 -0700 Subject: firmware: ti_sci: rm: Add support for second resource range Sysfw added support for a second range in the resource range API to be able to describe complex allocations mainly for DMA channels. Update the ti_sci part to consider the second range as well. Signed-off-by: Peter Ujfalusi Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 6cd537db4d33..9699b260de59 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -197,13 +197,17 @@ struct ti_sci_clk_ops { /** * struct ti_sci_resource_desc - Description of TI SCI resource instance range. - * @start: Start index of the resource. - * @num: Number of resources. + * @start: Start index of the first resource range. + * @num: Number of resources in the first range. + * @start_sec: Start index of the second resource range. + * @num_sec: Number of resources in the second range. * @res_map: Bitmap to manage the allocation of these resources. */ struct ti_sci_resource_desc { u16 start; u16 num; + u16 start_sec; + u16 num_sec; unsigned long *res_map; }; -- cgit v1.2.3 From ce1feed58534d8489afb4900bee75dff15d950e0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:05 -0700 Subject: firmware: ti_sci: rm: Add support for extended_ch_type for tx channel Sysfw added 'extended_ch_type' to the tx_ch_cfg_req message which should be used when BCDMA block copy channels are configured: extended_ch_type = 0 : the channel is split tx channel (tchan) extended_ch_type = 1 : the channel is block copy channel (bchan) Signed-off-by: Peter Ujfalusi Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 9699b260de59..6978afc00823 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -336,6 +336,9 @@ struct ti_sci_rm_psil_ops { #define TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_128_BYTES 2 #define TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES 3 +#define TI_SCI_RM_BCDMA_EXTENDED_CH_TYPE_TCHAN 0 +#define TI_SCI_RM_BCDMA_EXTENDED_CH_TYPE_BCHAN 1 + /* UDMAP TX/RX channel valid_params common declarations */ #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID BIT(0) #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID BIT(1) @@ -362,6 +365,7 @@ struct ti_sci_msg_rm_udmap_tx_ch_cfg { #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_CREDIT_COUNT_VALID BIT(12) #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FDEPTH_VALID BIT(13) #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_TDTYPE_VALID BIT(15) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_EXTENDED_CH_TYPE_VALID BIT(16) u16 nav_id; u16 index; u8 tx_pause_on_err; @@ -380,6 +384,7 @@ struct ti_sci_msg_rm_udmap_tx_ch_cfg { u8 tx_sched_priority; u8 tx_burst_size; u8 tx_tdtype; + u8 extended_ch_type; }; /** -- cgit v1.2.3 From 4d8ddf673a420aa25668eceeb4fbf33e2521fdf2 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:05 -0700 Subject: firmware: ti_sci: rm: Remove ring_get_config support The ring_get_cfg (0x1111 message) is not used and it is not supported by sysfw for a long time. Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 6978afc00823..6710d7ac7a72 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -286,8 +286,6 @@ struct ti_sci_rm_irq_ops { /** * struct ti_sci_rm_ringacc_ops - Ring Accelerator Management operations * @config: configure the SoC Navigator Subsystem Ring Accelerator ring - * @get_config: get the SoC Navigator Subsystem Ring Accelerator ring - * configuration */ struct ti_sci_rm_ringacc_ops { int (*config)(const struct ti_sci_handle *handle, @@ -295,10 +293,6 @@ struct ti_sci_rm_ringacc_ops { u32 addr_lo, u32 addr_hi, u32 count, u8 mode, u8 size, u8 order_id ); - int (*get_config)(const struct ti_sci_handle *handle, - u32 nav_id, u32 index, u8 *mode, - u32 *addr_lo, u32 *addr_hi, u32 *count, - u8 *size, u8 *order_id); }; /** -- cgit v1.2.3 From 3c2017536f3a122bf246cc87f9327e9ec138db92 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:06 -0700 Subject: firmware: ti_sci: rm: Add new ops for ring configuration The sysfw ring configuration message has been extended to include virtid and asel value for the ring. Add the ASEL_VALID to TI_SCI_MSG_VALUE_RM_ALL_NO_ORDER as it is required for DMA rings. Instead of extending the current .config() ops - which would need same patch change in the ringacc driver - add ti_sci_msg_rm_ring_cfg struct and a new ops using it to configure the ring. This will allow easy update path in case new members are added for the ring configuration. Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 6710d7ac7a72..d1711050cd9d 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -275,17 +275,44 @@ struct ti_sci_rm_irq_ops { #define TI_SCI_MSG_VALUE_RM_RING_SIZE_VALID BIT(4) /* RA config.order_id parameter is valid for RM ring configure TISCI message */ #define TI_SCI_MSG_VALUE_RM_RING_ORDER_ID_VALID BIT(5) +/* RA config.virtid parameter is valid for RM ring configure TISCI message */ +#define TI_SCI_MSG_VALUE_RM_RING_VIRTID_VALID BIT(6) +/* RA config.asel parameter is valid for RM ring configure TISCI message */ +#define TI_SCI_MSG_VALUE_RM_RING_ASEL_VALID BIT(7) #define TI_SCI_MSG_VALUE_RM_ALL_NO_ORDER \ (TI_SCI_MSG_VALUE_RM_RING_ADDR_LO_VALID | \ TI_SCI_MSG_VALUE_RM_RING_ADDR_HI_VALID | \ TI_SCI_MSG_VALUE_RM_RING_COUNT_VALID | \ TI_SCI_MSG_VALUE_RM_RING_MODE_VALID | \ - TI_SCI_MSG_VALUE_RM_RING_SIZE_VALID) + TI_SCI_MSG_VALUE_RM_RING_SIZE_VALID | \ + TI_SCI_MSG_VALUE_RM_RING_ASEL_VALID) + +/** + * struct ti_sci_msg_rm_ring_cfg - Ring configuration + * + * Parameters for Navigator Subsystem ring configuration + * See @ti_sci_msg_rm_ring_cfg_req + */ +struct ti_sci_msg_rm_ring_cfg { + u32 valid_params; + u16 nav_id; + u16 index; + u32 addr_lo; + u32 addr_hi; + u32 count; + u8 mode; + u8 size; + u8 order_id; + u16 virtid; + u8 asel; +}; /** * struct ti_sci_rm_ringacc_ops - Ring Accelerator Management operations * @config: configure the SoC Navigator Subsystem Ring Accelerator ring + * Deprecated + * @set_cfg: configure the SoC Navigator Subsystem Ring Accelerator ring */ struct ti_sci_rm_ringacc_ops { int (*config)(const struct ti_sci_handle *handle, @@ -293,6 +320,8 @@ struct ti_sci_rm_ringacc_ops { u32 addr_lo, u32 addr_hi, u32 count, u8 mode, u8 size, u8 order_id ); + int (*set_cfg)(const struct ti_sci_handle *handle, + const struct ti_sci_msg_rm_ring_cfg *params); }; /** -- cgit v1.2.3 From fed7552f1e69296461fca62ebaa0bb5a06fec0df Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:07 -0700 Subject: firmware: ti_sci: rm: Remove unused config() from ti_sci_rm_ringacc_ops The ringacc driver has been converted to use the new set_cfg function to configure the ring, the old config ops can be removed. Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index d1711050cd9d..0aad7009b50e 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -310,16 +310,9 @@ struct ti_sci_msg_rm_ring_cfg { /** * struct ti_sci_rm_ringacc_ops - Ring Accelerator Management operations - * @config: configure the SoC Navigator Subsystem Ring Accelerator ring - * Deprecated * @set_cfg: configure the SoC Navigator Subsystem Ring Accelerator ring */ struct ti_sci_rm_ringacc_ops { - int (*config)(const struct ti_sci_handle *handle, - u32 valid_params, u16 nav_id, u16 index, - u32 addr_lo, u32 addr_hi, u32 count, u8 mode, - u8 size, u8 order_id - ); int (*set_cfg)(const struct ti_sci_handle *handle, const struct ti_sci_msg_rm_ring_cfg *params); }; -- cgit v1.2.3 From 8c42379e40e2db4199ceeb6a6ef9fff73ff132cf Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:22 -0700 Subject: soc: ti: k3-ringacc: Use correct device for allocation in RING mode In RING mode the ringacc does not access the ring memory. In this access mode the ringacc coherency does not have meaning. If the ring is configured in RING mode, then the ringacc itself will not access to the ring memory. Only the requester (user) of the ring is going to read/write to the memory. Extend the ring configuration parameters with a device pointer to be used for DMA API when the ring is configured in RING mode. Extending the ring configuration struct will allow per ring selection of device to be used for allocation, thus allowing per ring coherency. To avoid regression, fall back to use the ringacc dev in case the alloc_dev is not provided. Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/k3-ringacc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/ti/k3-ringacc.h b/include/linux/soc/ti/k3-ringacc.h index 5a472eca5ee4..658dc71d2901 100644 --- a/include/linux/soc/ti/k3-ringacc.h +++ b/include/linux/soc/ti/k3-ringacc.h @@ -67,6 +67,9 @@ struct k3_ring; * few times. It's usable when the same ring is used as Free Host PD ring * for different flows, for example. * Note: Locking should be done by consumer if required + * @dma_dev: Master device which is using and accessing to the ring + * memory when the mode is K3_RINGACC_RING_MODE_RING. Memory allocations + * should be done using this device. */ struct k3_ring_cfg { u32 size; @@ -74,6 +77,8 @@ struct k3_ring_cfg { enum k3_ring_mode mode; #define K3_RINGACC_RING_SHARED BIT(1) u32 flags; + + struct device *dma_dev; }; #define K3_RINGACC_RING_ID_ANY (-1) -- cgit v1.2.3 From f51778db088b2407ec177f2f4da0f6290602aa3f Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 2 Nov 2020 12:43:27 +1100 Subject: swiotlb: using SIZE_MAX needs limits.h included After merging the drm-misc tree, linux-next build (arm multi_v7_defconfig) failed like this: In file included from drivers/gpu/drm/nouveau/nouveau_ttm.c:26: include/linux/swiotlb.h: In function 'swiotlb_max_mapping_size': include/linux/swiotlb.h:99:9: error: 'SIZE_MAX' undeclared (first use in this function) 99 | return SIZE_MAX; | ^~~~~~~~ include/linux/swiotlb.h:7:1: note: 'SIZE_MAX' is defined in header ''; did you forget to '#include '? 6 | #include +++ |+#include 7 | #include include/linux/swiotlb.h:99:9: note: each undeclared identifier is reported only once for each function it appears in 99 | return SIZE_MAX; | ^~~~~~~~ Caused by commit abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()") but only exposed by commit "drm/nouveu: fix swiotlb include" Fix it by including linux/limits.h as appropriate. Fixes: abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()") Signed-off-by: Stephen Rothwell Link: https://lore.kernel.org/r/20201102124327.2f82b2a7@canb.auug.org.au Signed-off-by: Michael S. Tsirkin --- include/linux/swiotlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 046bb94bd4d6..fa5122c6711e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -5,6 +5,7 @@ #include #include #include +#include struct device; struct page; -- cgit v1.2.3 From 7a60c2dd0f575ab14a457e99582af0ca1e072a74 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 28 Oct 2020 16:15:26 -0300 Subject: drm: Remove SCATTERLIST_MAX_SEGMENT Since commit 9a40401cfa13 ("lib/scatterlist: Do not limit max_segment to PAGE_ALIGNED values") the max_segment input to sg_alloc_table_from_pages() does not have to be any special value. The new algorithm will always create something less than what the user provides. Thus eliminate this confusing constant. - vmwgfx should use the HW capability, not mix in the OS page size for calling dma_set_max_seg_size() - i915 uses i915_sg_segment_size() both for sg_alloc_table_from_pages and for some open coded sgl construction. This doesn't change the value since rounddown(size, UINT_MAX) == SCATTERLIST_MAX_SEGMENT - drm_prime_pages_to_sg uses it as a default if max_segment is zero, UINT_MAX is fine to use directly. Cc: Gerd Hoffmann Cc: Daniel Vetter Cc: Thomas Hellstrom Cc: Qian Cai Cc: "Ursulin, Tvrtko" Suggested-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/0-v1-44733fccd781+13d-rm_scatterlist_max_jgg@nvidia.com --- include/linux/scatterlist.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 36c47e7e66a2..6f70572b2938 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -18,12 +18,6 @@ struct scatterlist { #endif }; -/* - * Since the above length field is an unsigned int, below we define the maximum - * length in bytes that can be stored in one scatterlist entry. - */ -#define SCATTERLIST_MAX_SEGMENT (UINT_MAX & PAGE_MASK) - /* * These macros should be used after a dma_map_sg call has been done * to get bus addresses of each of the SG entries and their lengths. -- cgit v1.2.3 From fc0021aa340af65a0a37d77be39e22aa886a6132 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Oct 2020 08:33:09 +0200 Subject: swiotlb: remove the tbl_dma_addr argument to swiotlb_tbl_map_single The tbl_dma_addr argument is used to check the DMA boundary for the allocations, and thus needs to be a dma_addr_t. swiotlb-xen instead passed a physical address, which could lead to incorrect results for strange offsets. Fix this by removing the parameter entirely and hard code the DMA address for io_tlb_start instead. Fixes: 91ffe4ad534a ("swiotlb-xen: introduce phys_to_dma/dma_to_phys translations") Signed-off-by: Christoph Hellwig Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 513913ff7486..3bb72266a75a 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -45,13 +45,9 @@ enum dma_sync_target { SYNC_FOR_DEVICE = 1, }; -extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t phys, - size_t mapping_size, - size_t alloc_size, - enum dma_data_direction dir, - unsigned long attrs); +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, + size_t mapping_size, size_t alloc_size, + enum dma_data_direction dir, unsigned long attrs); extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, -- cgit v1.2.3 From e0e398e204634db8fb71bd89cf2f6e3e5bd09b51 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Oct 2020 21:12:15 +0200 Subject: PM: runtime: Drop runtime PM references to supplier on link removal While removing a device link, drop the supplier device's runtime PM usage counter as many times as needed to drop all of the runtime PM references to it from the consumer in addition to dropping the consumer's link count. Fixes: baa8809f6097 ("PM / runtime: Optimize the use of device links") Signed-off-by: Rafael J. Wysocki Cc: 5.1+ # 5.1+ Tested-by: Xiang Chen Reviewed-by: Greg Kroah-Hartman --- include/linux/pm_runtime.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 18b02dcc168e..eadc1fdebce6 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -58,7 +58,7 @@ extern void pm_runtime_clean_up_links(struct device *dev); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); -extern void pm_runtime_drop_link(struct device *dev); +extern void pm_runtime_drop_link(struct device_link *link); /** * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. @@ -280,7 +280,7 @@ static inline void pm_runtime_clean_up_links(struct device *dev) {} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} -static inline void pm_runtime_drop_link(struct device *dev) {} +static inline void pm_runtime_drop_link(struct device_link *link) {} #endif /* !CONFIG_PM */ -- cgit v1.2.3 From d6e36668598154820177bfd78c1621d8e6c580a2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Oct 2020 21:13:10 +0200 Subject: PM: runtime: Drop pm_runtime_clean_up_links() After commit d12544fb2aa9 ("PM: runtime: Remove link state checks in rpm_get/put_supplier()") nothing prevents the consumer device's runtime PM from acquiring additional references to the supplier device after pm_runtime_clean_up_links() has run (or even while it is running), so calling this function from __device_release_driver() may be pointless (or even harmful). Moreover, it ignores stateless device links, so the runtime PM handling of managed and stateless device links is inconsistent because of it, so better get rid of it entirely. Fixes: d12544fb2aa9 ("PM: runtime: Remove link state checks in rpm_get/put_supplier()") Signed-off-by: Rafael J. Wysocki Cc: 5.1+ # 5.1+ Tested-by: Xiang Chen Reviewed-by: Greg Kroah-Hartman --- include/linux/pm_runtime.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index eadc1fdebce6..4b708f4e8eed 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -54,7 +54,6 @@ extern u64 pm_runtime_autosuspend_expiration(struct device *dev); extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); -extern void pm_runtime_clean_up_links(struct device *dev); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); @@ -276,7 +275,6 @@ static inline u64 pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} -static inline void pm_runtime_clean_up_links(struct device *dev) {} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} -- cgit v1.2.3 From 56a7ff75cd08987812209971e319f78156ea2bb1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Oct 2020 13:57:45 +0200 Subject: cpufreq: Drop restore_freq from struct cpufreq_policy The restore_freq field in struct cpufreq_policy is only used by __target_index() in one place and a local variable in that function may as well be used instead of it, so drop it and modify __target_index() accordingly. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1eaa04f1bae6..9779a6cd8baa 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -65,7 +65,6 @@ struct cpufreq_policy { unsigned int max; /* in kHz */ unsigned int cur; /* in kHz, only needed if cpufreq * governors are used */ - unsigned int restore_freq; /* = policy->cur before transition */ unsigned int suspend_freq; /* freq to set during suspend */ unsigned int policy; /* see above */ @@ -308,10 +307,6 @@ struct cpufreq_driver { /* define one out of two */ int (*setpolicy)(struct cpufreq_policy *policy); - /* - * On failure, should always restore frequency to policy->restore_freq - * (i.e. old freq). - */ int (*target)(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); /* Deprecated */ -- cgit v1.2.3 From b000d5cb954fe25ac1ea929ae6da321033ace927 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 13 Oct 2020 10:18:04 +0200 Subject: ima: defer arch_ima_get_secureboot() call to IMA init time Chester reports that it is necessary to introduce a new way to pass the EFI secure boot status between the EFI stub and the core kernel on ARM systems. The usual way of obtaining this information is by checking the SecureBoot and SetupMode EFI variables, but this can only be done after the EFI variable workqueue is created, which occurs in a subsys_initcall(), whereas arch_ima_get_secureboot() is called much earlier by the IMA framework. However, the IMA framework itself is started as a late_initcall, and the only reason the call to arch_ima_get_secureboot() occurs so early is because it happens in the context of a __setup() callback that parses the ima_appraise= command line parameter. So let's refactor this code a little bit, by using a core_param() callback to capture the command line argument, and deferring any reasoning based on its contents to the IMA init routine. Cc: Chester Lin Cc: Dmitry Kasatkin Cc: James Morris Cc: "Serge E. Hallyn" Link: https://lore.kernel.org/linux-arm-kernel/20200904072905.25332-2-clin@suse.com/ Signed-off-by: Ard Biesheuvel Reported-by: kernel test robot [missing core_param()] [zohar@linux.ibm.com: included linux/module.h] Tested-by: Chester Lin Signed-off-by: Mimi Zohar --- include/linux/ima.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 8fa7bcfb2da2..ac3d82f962f2 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -31,6 +31,12 @@ extern void ima_post_path_mknod(struct dentry *dentry); extern int ima_file_hash(struct file *file, char *buf, size_t buf_size); extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size); +#ifdef CONFIG_IMA_APPRAISE_BOOTPARAM +extern void ima_appraise_parse_cmdline(void); +#else +static inline void ima_appraise_parse_cmdline(void) {} +#endif + #ifdef CONFIG_IMA_KEXEC extern void ima_add_kexec_buffer(struct kimage *image); #endif -- cgit v1.2.3 From 24269999027e6b161c0078ad9c1557f9a1575128 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:32:57 +0200 Subject: EDAC: Fix some kernel-doc markups Kernel-doc markup should use this format: identifier - description Correct that and also fix some enums' names in the kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/1d291393ba58c7b80908a3fedf02d2f53921ffe9.1603469755.git.mchehab+huawei@kernel.org --- include/linux/edac.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 15e8f3d8a895..52d7487f6bd4 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -229,7 +229,7 @@ enum mem_type { #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) /** - * enum edac-type - Error Detection and Correction capabilities and mode + * enum edac_type - Error Detection and Correction capabilities and mode * @EDAC_UNKNOWN: Unknown if ECC is available * @EDAC_NONE: Doesn't support ECC * @EDAC_RESERVED: Reserved ECC type @@ -309,7 +309,7 @@ enum scrub_type { #define OP_OFFLINE 0x300 /** - * enum edac_mc_layer - memory controller hierarchy layer + * enum edac_mc_layer_type - memory controller hierarchy layer * * @EDAC_MC_LAYER_BRANCH: memory layer is named "branch" * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel" -- cgit v1.2.3 From f8f6ae5d077a9bdaf5cbf2ac960a5d1a04b47482 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 1 Nov 2020 17:08:00 -0800 Subject: mm: always have io_remap_pfn_range() set pgprot_decrypted() The purpose of io_remap_pfn_range() is to map IO memory, such as a memory mapped IO exposed through a PCI BAR. IO devices do not understand encryption, so this memory must always be decrypted. Automatically call pgprot_decrypted() as part of the generic implementation. This fixes a bug where enabling AMD SME causes subsystems, such as RDMA, using io_remap_pfn_range() to expose BAR pages to user space to fail. The CPU will encrypt access to those BAR pages instead of passing unencrypted IO directly to the device. Places not mapping IO should use remap_pfn_range(). Fixes: aca20d546214 ("x86/mm: Add support to make use of Secure Memory Encryption") Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Cc: Arnd Bergmann Cc: Tom Lendacky Cc: Thomas Gleixner Cc: Andrey Ryabinin Cc: Borislav Petkov Cc: Brijesh Singh Cc: Jonathan Corbet Cc: Dmitry Vyukov Cc: "Dave Young" Cc: Alexander Potapenko Cc: Konrad Rzeszutek Wilk Cc: Andy Lutomirski Cc: Larry Woodman Cc: Matt Fleming Cc: Ingo Molnar Cc: "Michael S. Tsirkin" Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Rik van Riel Cc: Toshimitsu Kani Cc: Link: https://lkml.kernel.org/r/0-v1-025d64bdf6c4+e-amd_sme_fix_jgg@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 +++++++++ include/linux/pgtable.h | 4 ---- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ef360fe70aaf..db6ae4d3fb4e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2759,6 +2759,15 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +#ifndef io_remap_pfn_range +static inline int io_remap_pfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot)); +} +#endif + static inline vm_fault_t vmf_error(int err) { if (err == -ENOMEM) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 38c33eabea89..71125a4676c4 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1427,10 +1427,6 @@ typedef unsigned int pgtbl_mod_mask; #endif /* !__ASSEMBLY__ */ -#ifndef io_remap_pfn_range -#define io_remap_pfn_range remap_pfn_range -#endif - #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 -- cgit v1.2.3 From 9f14cb030d987ae5e201e88cd345c6d772bcce51 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 16 Sep 2020 11:45:22 -0700 Subject: sched: Un-hide lockdep_tasklist_lock_is_held() for !LOCKDEP Currently, variables used only within lockdep expressions are flagged as unused, requiring that these variables' declarations be decorated with either #ifdef or __maybe_unused. This results in ugly code. This commit therefore causes the lockdep_tasklist_lock_is_held() function to be visible even when lockdep is not enabled, thus removing the need for these decorations. This approach further relies on dead-code elimination to remove any references to functions or variables that are not available in non-lockdep kernels. Signed-off-by: Jakub Kicinski Signed-off-by: Paul E. McKenney --- include/linux/sched/task.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 85fb2f34c59b..c0f71f2e7160 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -47,9 +47,7 @@ extern spinlock_t mmlist_lock; extern union thread_union init_thread_union; extern struct task_struct init_task; -#ifdef CONFIG_PROVE_RCU extern int lockdep_tasklist_lock_is_held(void); -#endif /* #ifdef CONFIG_PROVE_RCU */ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); -- cgit v1.2.3 From 891cd1f99dd94746f0caf5eea0121079178ee9bf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 16 Sep 2020 11:45:23 -0700 Subject: rcu: Un-hide lockdep maps for !LOCKDEP Currently, variables used only within lockdep expressions are flagged as unused, requiring that these variables' declarations be decorated with either #ifdef or __maybe_unused. This results in ugly code. This commit therefore causes the RCU lock maps to be visible even when lockdep is not enabled, thus removing the need for these decorations. This approach further relies on dead-code elimination to remove any references to functions or variables that are not available in non-lockdep kernels. Signed-off-by: Jakub Kicinski Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 9 +++++---- include/linux/rcupdate_trace.h | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6cdd0152c253..f9533bbcbb36 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -241,6 +241,11 @@ bool rcu_lockdep_current_cpu_online(void); static inline bool rcu_lockdep_current_cpu_online(void) { return true; } #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ +extern struct lockdep_map rcu_lock_map; +extern struct lockdep_map rcu_bh_lock_map; +extern struct lockdep_map rcu_sched_lock_map; +extern struct lockdep_map rcu_callback_map; + #ifdef CONFIG_DEBUG_LOCK_ALLOC static inline void rcu_lock_acquire(struct lockdep_map *map) @@ -253,10 +258,6 @@ static inline void rcu_lock_release(struct lockdep_map *map) lock_release(map, _THIS_IP_); } -extern struct lockdep_map rcu_lock_map; -extern struct lockdep_map rcu_bh_lock_map; -extern struct lockdep_map rcu_sched_lock_map; -extern struct lockdep_map rcu_callback_map; int debug_lockdep_rcu_enabled(void); int rcu_read_lock_held(void); int rcu_read_lock_bh_held(void); diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 3e7919fc5f34..86c8f6c98412 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -11,10 +11,10 @@ #include #include -#ifdef CONFIG_DEBUG_LOCK_ALLOC - extern struct lockdep_map rcu_trace_lock_map; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + static inline int rcu_read_lock_trace_held(void) { return lock_is_held(&rcu_trace_lock_map); -- cgit v1.2.3 From cd539cff9470fe1dacf0bf5ab3f54f37b854d6fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 16 Sep 2020 11:45:27 -0700 Subject: lockdep: Provide dummy forward declaration of *_is_held() helpers When CONFIG_LOCKDEP is not set, lock_is_held() and lockdep_is_held() are not declared or defined. This forces all callers to use #ifdefs around these checks. Recent RCU changes added a lot of lockdep_is_held() calls inside rcu_dereference_protected(). This macro hides its argument on !LOCKDEP builds, which can lead to false-positive unused-variable warnings. This commit therefore provides forward declarations of lock_is_held() and lockdep_is_held() but without defining them. This way callers (including those internal to RCU) can keep them visible to the compiler on !LOCKDEP builds and instead depend on dead code elimination to remove the references, which in turn prevents the linker from complaining about the lack of the corresponding function definitions. [ paulmck: Apply Peter Zijlstra feedback on "extern". ] Signed-off-by: Jakub Kicinski -- CC: peterz@infradead.org CC: mingo@redhat.com CC: will@kernel.org Signed-off-by: Paul E. McKenney --- include/linux/lockdep.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index f5594879175a..ccc3ce66c7e0 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -375,6 +375,12 @@ static inline void lockdep_unregister_key(struct lock_class_key *key) #define lockdep_depth(tsk) (0) +/* + * Dummy forward declarations, allow users to write less ifdef-y code + * and depend on dead code elimination. + */ +extern int lock_is_held(const void *); +extern int lockdep_is_held(const void *); #define lockdep_is_held_type(l, r) (1) #define lockdep_assert_held(l) do { (void)(l); } while (0) -- cgit v1.2.3 From 65e9eb1ccfe56b41a0d8bfec651ea014968413cb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 16 Sep 2020 11:45:28 -0700 Subject: rcu: Prevent RCU_LOCKDEP_WARN() from swallowing the condition We run into a unused variable warning in bridge code when variable is only used inside the condition of rcu_dereference_protected(). #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) Since on builds with CONFIG_PROVE_RCU=n rcu_dereference_protected() compiles to nothing the compiler doesn't see the variable use. This commit therefore prevents this warning by adding the condition as dead code. Signed-off-by: Jakub Kicinski -- CC: paulmck@kernel.org CC: josh@joshtriplett.org CC: rostedt@goodmis.org CC: mathieu.desnoyers@efficios.com CC: joel@joelfernandes.org CC: jiangshanlai@gmail.com Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f9533bbcbb36..de0826411311 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -328,7 +328,7 @@ static inline void rcu_preempt_sleep_check(void) { } #else /* #ifdef CONFIG_PROVE_RCU */ -#define RCU_LOCKDEP_WARN(c, s) do { } while (0) +#define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c)) #define rcu_sleep_check() do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ -- cgit v1.2.3 From 6370cc3bbd8a0f9bf975b013781243ab147876c6 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Thu, 29 Oct 2020 17:36:19 +0000 Subject: net: add kcov handle to skb extensions Remote KCOV coverage collection enables coverage-guided fuzzing of the code that is not reachable during normal system call execution. It is especially helpful for fuzzing networking subsystems, where it is common to perform packet handling in separate work queues even for the packets that originated directly from the user space. Enable coverage-guided frame injection by adding kcov remote handle to skb extensions. Default initialization in __alloc_skb and __build_skb_around ensures that no socket buffer that was generated during a system call will be missed. Code that is of interest and that performs packet processing should be annotated with kcov_remote_start()/kcov_remote_stop(). An alternative approach is to determine kcov_handle solely on the basis of the device/interface that received the specific socket buffer. However, in this case it would be impossible to distinguish between packets that originated during normal background network processes or were intentionally injected from the user space. Signed-off-by: Aleksandr Nogikh Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a828cf99c521..2d01b2bbb746 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4150,6 +4150,9 @@ enum skb_ext_id { #endif #if IS_ENABLED(CONFIG_MPTCP) SKB_EXT_MPTCP, +#endif +#if IS_ENABLED(CONFIG_KCOV) + SKB_EXT_KCOV_HANDLE, #endif SKB_EXT_NUM, /* must be last */ }; @@ -4605,5 +4608,35 @@ static inline void skb_reset_redirect(struct sk_buff *skb) #endif } +#ifdef CONFIG_KCOV +static inline void skb_set_kcov_handle(struct sk_buff *skb, + const u64 kcov_handle) +{ + /* Do not allocate skb extensions only to set kcov_handle to zero + * (as it is zero by default). However, if the extensions are + * already allocated, update kcov_handle anyway since + * skb_set_kcov_handle can be called to zero a previously set + * value. + */ + if (skb_has_extensions(skb) || kcov_handle) { + u64 *kcov_handle_ptr = skb_ext_add(skb, SKB_EXT_KCOV_HANDLE); + + if (kcov_handle_ptr) + *kcov_handle_ptr = kcov_handle; + } +} + +static inline u64 skb_get_kcov_handle(struct sk_buff *skb) +{ + u64 *kcov_handle = skb_ext_find(skb, SKB_EXT_KCOV_HANDLE); + + return kcov_handle ? *kcov_handle : 0; +} +#else +static inline void skb_set_kcov_handle(struct sk_buff *skb, + const u64 kcov_handle) { } +static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { return 0; } +#endif /* CONFIG_KCOV */ + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From 31909e3330c8d65e9a928d40833ebd5feb4f64e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Wei=C3=9F?= Date: Tue, 27 Oct 2020 21:42:56 +0100 Subject: timens: additional helper functions for boottime offset handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide functions for time_namespace to subtract the boottime offset from a timespec64 as well as to apply the boottime offset to u64 types in nanoseconds. Signed-off-by: Michael Weiß Reviewed-by: Andrei Vagin Acked-by: Thomas Gleixner Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20201027204258.7869-2-michael.weiss@aisec.fraunhofer.de --- include/linux/time_namespace.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 5b6031385db0..68770ac9ba89 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -77,6 +77,20 @@ static inline void timens_add_boottime(struct timespec64 *ts) *ts = timespec64_add(*ts, ns_offsets->boottime); } +static inline u64 timens_add_boottime_ns(u64 nsec) +{ + struct timens_offsets *ns_offsets = ¤t->nsproxy->time_ns->offsets; + + return nsec + timespec64_to_ns(&ns_offsets->boottime); +} + +static inline void timens_sub_boottime(struct timespec64 *ts) +{ + struct timens_offsets *ns_offsets = ¤t->nsproxy->time_ns->offsets; + + *ts = timespec64_sub(*ts, ns_offsets->boottime); +} + ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim, struct timens_offsets *offsets); @@ -130,6 +144,14 @@ static inline int timens_on_fork(struct nsproxy *nsproxy, static inline void timens_add_monotonic(struct timespec64 *ts) { } static inline void timens_add_boottime(struct timespec64 *ts) { } + +static inline u64 timens_add_boottime_ns(u64 nsec) +{ + return nsec; +} + +static inline void timens_sub_boottime(struct timespec64 *ts) { } + static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) { return tim; -- cgit v1.2.3 From 5190db9fdd20fa5ba6084c98a3bc71c2fdf6a871 Mon Sep 17 00:00:00 2001 From: Roman Anufriev Date: Sun, 18 Oct 2020 05:56:54 +0300 Subject: fs/quota: update quota state flags scheme with project quota flags Current quota state flags scheme doesn't include project quota and thus shows all flags after DQUOT_USAGE_ENABLED wrong. Fix this and also add DQUOT_NOLIST_DIRTY to the scheme. Link: https://lore.kernel.org/r/1602989814-28922-1-git-send-email-dotdot@yandex-team.ru Signed-off-by: Roman Anufriev Signed-off-by: Jan Kara --- include/linux/quota.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 27aab84fcbaa..18ebd39c9487 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -448,17 +448,18 @@ struct quota_format_type { }; /** - * Quota state flags - they actually come in two flavors - for users and groups. + * Quota state flags - they come in three flavors - for users, groups and projects. * * Actual typed flags layout: - * USRQUOTA GRPQUOTA - * DQUOT_USAGE_ENABLED 0x0001 0x0002 - * DQUOT_LIMITS_ENABLED 0x0004 0x0008 - * DQUOT_SUSPENDED 0x0010 0x0020 + * USRQUOTA GRPQUOTA PRJQUOTA + * DQUOT_USAGE_ENABLED 0x0001 0x0002 0x0004 + * DQUOT_LIMITS_ENABLED 0x0008 0x0010 0x0020 + * DQUOT_SUSPENDED 0x0040 0x0080 0x0100 * * Following bits are used for non-typed flags: - * DQUOT_QUOTA_SYS_FILE 0x0040 - * DQUOT_NEGATIVE_USAGE 0x0080 + * DQUOT_QUOTA_SYS_FILE 0x0200 + * DQUOT_NEGATIVE_USAGE 0x0400 + * DQUOT_NOLIST_DIRTY 0x0800 */ enum { _DQUOT_USAGE_ENABLED = 0, /* Track disk usage for users */ -- cgit v1.2.3 From 286228d382ba6320f04fa2e7c6fc8d4d92e428f4 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 18 Dec 2019 09:39:02 +0100 Subject: can: can_create_echo_skb(): fix echo skb generation: always use skb_clone() All user space generated SKBs are owned by a socket (unless injected into the key via AF_PACKET). If a socket is closed, all associated skbs will be cleaned up. This leads to a problem when a CAN driver calls can_put_echo_skb() on a unshared SKB. If the socket is closed prior to the TX complete handler, can_get_echo_skb() and the subsequent delivering of the echo SKB to all registered callbacks, a SKB with a refcount of 0 is delivered. To avoid the problem, in can_get_echo_skb() the original SKB is now always cloned, regardless of shared SKB or not. If the process exists it can now safely discard its SKBs, without disturbing the delivery of the echo SKB. The problem shows up in the j1939 stack, when it clones the incoming skb, which detects the already 0 refcount. We can easily reproduce this with following example: testj1939 -B -r can0: & cansend can0 1823ff40#0123 WARNING: CPU: 0 PID: 293 at lib/refcount.c:25 refcount_warn_saturate+0x108/0x174 refcount_t: addition on 0; use-after-free. Modules linked in: coda_vpu imx_vdoa videobuf2_vmalloc dw_hdmi_ahb_audio vcan CPU: 0 PID: 293 Comm: cansend Not tainted 5.5.0-rc6-00376-g9e20dcb7040d #1 Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) Backtrace: [] (dump_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x8c/0xa0) [] (dump_stack) from [] (__warn+0xe0/0x108) [] (__warn) from [] (warn_slowpath_fmt+0xa8/0xcc) [] (warn_slowpath_fmt) from [] (refcount_warn_saturate+0x108/0x174) [] (refcount_warn_saturate) from [] (j1939_can_recv+0x20c/0x210) [] (j1939_can_recv) from [] (can_rcv_filter+0xb4/0x268) [] (can_rcv_filter) from [] (can_receive+0xb0/0xe4) [] (can_receive) from [] (can_rcv+0x48/0x98) [] (can_rcv) from [] (__netif_receive_skb_one_core+0x64/0x88) [] (__netif_receive_skb_one_core) from [] (__netif_receive_skb+0x38/0x94) [] (__netif_receive_skb) from [] (netif_receive_skb_internal+0x64/0xf8) [] (netif_receive_skb_internal) from [] (netif_receive_skb+0x34/0x19c) [] (netif_receive_skb) from [] (can_rx_offload_napi_poll+0x58/0xb4) Fixes: 0ae89beb283a ("can: add destructor for self generated skbs") Signed-off-by: Oleksij Rempel Link: http://lore.kernel.org/r/20200124132656.22156-1-o.rempel@pengutronix.de Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 900b9f4e0605..fc61cf4eff1c 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -61,21 +61,17 @@ static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) */ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) { - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + struct sk_buff *nskb; - if (likely(nskb)) { - can_skb_set_owner(nskb, skb->sk); - consume_skb(skb); - return nskb; - } else { - kfree_skb(skb); - return NULL; - } + nskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!nskb)) { + kfree_skb(skb); + return NULL; } - /* we can assume to have an unshared skb with proper owner */ - return skb; + can_skb_set_owner(nskb, skb->sk); + consume_skb(skb); + return nskb; } #endif /* !_CAN_SKB_H */ -- cgit v1.2.3 From 2e4ef10f58502323ea470bc30ba84d5ddd4e77f0 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 1 Nov 2020 13:17:07 +0000 Subject: net: add GSO UDP L4 and GSO fraglists to the list of software-backed types Commit e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") and commit 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") made UDP L4 and fraglisted GRO/GSO fully supported by the software fallback mode. We can safely add them to NETIF_F_GSO_SOFTWARE to allow logical/virtual netdevs to forward these types of skbs up to the real drivers. Signed-off-by: Alexander Lobakin Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/linux/netdev_features.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 0b17c4322b09..934de56644e7 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -207,8 +207,8 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) NETIF_F_FSO) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \ - NETIF_F_GSO_SCTP) +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \ + NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST) /* * If one device supports one of these features, then enable them -- cgit v1.2.3 From 179dfb954790410f65605f1c479c029c2cd73c55 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 26 Oct 2020 11:44:21 +0100 Subject: USB: serial: remove write wait queue The digi_acceleport driver is the only driver still using the port write wake queue so move it to that driver's port data. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- include/linux/usb/serial.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 8e67eff9039f..1c09b922f8b0 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -62,7 +62,6 @@ * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this * port. * @flags: usb serial port flags - * @write_wait: a wait_queue_head_t used by the port. * @work: work queue entry for the line discipline waking up. * @dev: pointer to the serial device * @@ -108,7 +107,6 @@ struct usb_serial_port { int tx_bytes; unsigned long flags; - wait_queue_head_t write_wait; struct work_struct work; unsigned long sysrq; /* sysrq timeout */ struct device dev; -- cgit v1.2.3 From 2374a045263b47f763571ec87ad7c65ea505188a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 29 Oct 2020 12:32:18 +0100 Subject: vt: keyboard, remove unneeded func_* declarations Now that we removed the ugly handling of func_buf, remove unneeded declarations of func_* variables. The definitions are in the generated defkeymap.c_shipped, so we cannot really remove them (but we would love to). Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20201029113222.32640-13-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/kbd_kern.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h index bb2246c8ec13..82f29aa35062 100644 --- a/include/linux/kbd_kern.h +++ b/include/linux/kbd_kern.h @@ -9,9 +9,6 @@ extern struct tasklet_struct keyboard_tasklet; extern char *func_table[MAX_NR_FUNC]; -extern char func_buf[]; -extern char *funcbufptr; -extern int funcbufsize, funcbufleft; /* * kbd->xxx contains the VC-local things (flag settings etc..) -- cgit v1.2.3 From 763e4cdc0f6d5cea45c896fef67f7be4bdefcca7 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 29 Oct 2020 14:30:48 -0700 Subject: iomap: support partial page discard on writeback block mapping failure iomap writeback mapping failure only calls into ->discard_page() if the current page has not been added to the ioend. Accordingly, the XFS callback assumes a full page discard and invalidation. This is problematic for sub-page block size filesystems where some portion of a page might have been mapped successfully before a failure to map a delalloc block occurs. ->discard_page() is not called in that error scenario and the bio is explicitly failed by iomap via the error return from ->prepare_ioend(). As a result, the filesystem leaks delalloc blocks and corrupts the filesystem block counters. Since XFS is the only user of ->discard_page(), tweak the semantics to invoke the callback unconditionally on mapping errors and provide the file offset that failed to map. Update xfs_discard_page() to discard the corresponding portion of the file and pass the range along to iomap_invalidatepage(). The latter already properly handles both full and sub-page scenarios by not changing any iomap or page state on sub-page invalidations. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 172b3397a1a3..5bd3cac4df9c 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -221,7 +221,7 @@ struct iomap_writeback_ops { * Optional, allows the file system to discard state on a page where * we failed to submit any I/O. */ - void (*discard_page)(struct page *page); + void (*discard_page)(struct page *page, loff_t fileoff); }; struct iomap_writepage_ctx { -- cgit v1.2.3 From fdaf083cdfb556a45c422c8998268baf1ab26829 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 30 Oct 2020 09:37:30 -0600 Subject: io_uring: properly handle SQPOLL request cancelations Track if a given task io_uring context contains SQPOLL instances, so we can iterate those for cancelation (and request counts). This ensures that we properly wait on SQPOLL contexts, and find everything that needs canceling. Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 868364cea3b7..35b2d845704d 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -30,7 +30,8 @@ struct io_uring_task { struct percpu_counter inflight; struct io_identity __identity; struct io_identity *identity; - bool in_idle; + atomic_t in_idle; + bool sqpoll; }; #if defined(CONFIG_IO_URING) -- cgit v1.2.3 From b6be002bcd1dd1dedb926abf3c90c794eacb77dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 Nov 2020 12:53:16 -0800 Subject: x86/entry: Move nmi entry/exit into common code Lockdep state handling on NMI enter and exit is nothing specific to X86. It's not any different on other architectures. Also the extra state type is not necessary, irqentry_state_t can carry the necessary information as well. Move it to common code and extend irqentry_state_t to carry lockdep state. [ Ira: Make exit_rcu and lockdep a union as they are mutually exclusive between the IRQ and NMI exceptions, and add kernel documentation for struct irqentry_state_t ] Signed-off-by: Thomas Gleixner Signed-off-by: Ira Weiny Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201102205320.1458656-7-ira.weiny@intel.com --- include/linux/entry-common.h | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index b9711e813ec2..1a128baf3628 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -346,8 +346,26 @@ void irqentry_enter_from_user_mode(struct pt_regs *regs); void irqentry_exit_to_user_mode(struct pt_regs *regs); #ifndef irqentry_state +/** + * struct irqentry_state - Opaque object for exception state storage + * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the + * exit path has to invoke rcu_irq_exit(). + * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that + * lockdep state is restored correctly on exit from nmi. + * + * This opaque object is filled in by the irqentry_*_enter() functions and + * must be passed back into the corresponding irqentry_*_exit() functions + * when the exception is complete. + * + * Callers of irqentry_*_[enter|exit]() must consider this structure opaque + * and all members private. Descriptions of the members are provided to aid in + * the maintenance of the irqentry_*() functions. + */ typedef struct irqentry_state { - bool exit_rcu; + union { + bool exit_rcu; + bool lockdep; + }; } irqentry_state_t; #endif @@ -407,4 +425,23 @@ void irqentry_exit_cond_resched(void); */ void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); +/** + * irqentry_nmi_enter - Handle NMI entry + * @regs: Pointer to currents pt_regs + * + * Similar to irqentry_enter() but taking care of the NMI constraints. + */ +irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); + +/** + * irqentry_nmi_exit - Handle return from NMI handling + * @regs: Pointer to pt_regs (NMI entry regs) + * @irq_state: Return value from matching call to irqentry_nmi_enter() + * + * Last action before returning to the low level assmenbly code. + * + * Counterpart to irqentry_nmi_enter(). + */ +void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); + #endif -- cgit v1.2.3 From e1ac4b2406d94eddce8ac2c5ab4235f6075a9602 Mon Sep 17 00:00:00 2001 From: Chester Lin Date: Fri, 30 Oct 2020 14:08:38 +0800 Subject: efi: generalize efi_get_secureboot Generalize the efi_get_secureboot() function so not only efistub but also other subsystems can use it. Note that the MokSbState handling is not factored out: the variable is boot time only, and so it cannot be parameterized as easily. Also, the IMA code will switch to this version in a future patch, and it does not incorporate the MokSbState exception in the first place. Note that the new efi_get_secureboot_mode() helper treats any failures to read SetupMode as setup mode being disabled. Co-developed-by: Chester Lin Signed-off-by: Chester Lin Acked-by: Mimi Zohar Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d7c0e73af2b9..1cd5d91d8ca1 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1089,7 +1089,28 @@ enum efi_secureboot_mode { efi_secureboot_mode_disabled, efi_secureboot_mode_enabled, }; -enum efi_secureboot_mode efi_get_secureboot(void); + +static inline +enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var) +{ + u8 secboot, setupmode = 0; + efi_status_t status; + unsigned long size; + + size = sizeof(secboot); + status = get_var(L"SecureBoot", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, + &secboot); + if (status == EFI_NOT_FOUND) + return efi_secureboot_mode_disabled; + if (status != EFI_SUCCESS) + return efi_secureboot_mode_unknown; + + size = sizeof(setupmode); + get_var(L"SetupMode", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, &setupmode); + if (secboot == 0 || setupmode == 1) + return efi_secureboot_mode_disabled; + return efi_secureboot_mode_enabled; +} #ifdef CONFIG_RESET_ATTACK_MITIGATION void efi_enable_reset_attack_mitigation(void); -- cgit v1.2.3 From 9d1c94a69d70f1b02bdf06b231cd16ad47ef06cd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:25 +0200 Subject: clk: fix a kernel-doc markup clk_get_duty_cycle -> clk_get_scaled_duty_cycle Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/b2336f3f3cdfe6e1a2d3a7a056ab7ccc7a81b945.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Stephen Boyd --- include/linux/clk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 7fd6a1febcf4..5f8d5f4931c0 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -150,7 +150,7 @@ int clk_get_phase(struct clk *clk); int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den); /** - * clk_get_duty_cycle - return the duty cycle ratio of a clock signal + * clk_get_scaled_duty_cycle - return the duty cycle ratio of a clock signal * @clk: clock signal source * @scale: scaling factor to be applied to represent the ratio as an integer * -- cgit v1.2.3 From 2b5b95b1ff3d70a95013a45e3b5b90f1daf42348 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 14 Sep 2020 15:09:33 +0200 Subject: mm: introduce vma_set_file function v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the new vma_set_file() function to allow changing vma->vm_file with the necessary refcount dance. v2: add more users of this. v3: add missing EXPORT_SYMBOL, rebase on mmap cleanup, add comments why we drop the reference on two occasions. v4: make it clear that changing an anonymous vma is illegal. Signed-off-by: Christian König Reviewed-by: Daniel Vetter (v2) Reviewed-by: Jason Gunthorpe Link: https://patchwork.freedesktop.org/patch/394773/ --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ef360fe70aaf..2b7ac36c42dd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2719,6 +2719,8 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma) } #endif +void vma_set_file(struct vm_area_struct *vma, struct file *file); + #ifdef CONFIG_NUMA_BALANCING unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long start, unsigned long end); -- cgit v1.2.3 From a4da45dda6475816f4c8b9e0d512261991ba31e5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 28 Oct 2020 15:51:17 +0100 Subject: pinctrl: Remove hole in pinctrl_gpio_range On 64-bit platforms, pointer size and alignment are 64-bit, hence two 4-byte holes are present before the pins and gc members of the pinctrl_gpio_range structure. Get rid of these holes by moving the pins pointer. This reduces kernel size of an arm64 Rockchip kernel by ca. 512 bytes. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201028145117.1731876-1-geert+renesas@glider.be Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 2aef59df93d7..70b45d28e7a9 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -51,8 +51,8 @@ struct pinctrl_pin_desc { * @id: an ID number for the chip in this range * @base: base offset of the GPIO range * @pin_base: base pin number of the GPIO range if pins == NULL - * @pins: enumeration of pins in GPIO range or NULL * @npins: number of pins in the GPIO range, including the base number + * @pins: enumeration of pins in GPIO range or NULL * @gc: an optional pointer to a gpio_chip */ struct pinctrl_gpio_range { @@ -61,8 +61,8 @@ struct pinctrl_gpio_range { unsigned int id; unsigned int base; unsigned int pin_base; - unsigned const *pins; unsigned int npins; + unsigned const *pins; struct gpio_chip *gc; }; -- cgit v1.2.3 From e40b0b56ffdc16edce207904a7782da6a1a47162 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 5 Nov 2020 17:05:35 +0100 Subject: Revert "mm: introduce vma_set_file function v4" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel test robot is not happy with that. This reverts commit 2b5b95b1ff3d70a95013a45e3b5b90f1daf42348. Signed-off-by: Christian König Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/394773/ --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2b7ac36c42dd..ef360fe70aaf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2719,8 +2719,6 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma) } #endif -void vma_set_file(struct vm_area_struct *vma, struct file *file); - #ifdef CONFIG_NUMA_BALANCING unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long start, unsigned long end); -- cgit v1.2.3 From 3b20369313a486246582c8ef6ff5d1d0b9c34613 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 5 Nov 2020 15:48:51 +0800 Subject: EDAC: Add three new memory types There are {Low-Power DDR3/4, WIO2} types of memory. Add new entries to 'enum mem_type' and new strings to 'edac_mem_types[]' for the new types. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck --- include/linux/edac.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 15e8f3d8a895..8f63245f7f7c 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -175,11 +175,14 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_RDDR3: Registered DDR3 RAM * This is a variant of the DDR3 memories. * @MEM_LRDDR3: Load-Reduced DDR3 memory. + * @MEM_LPDDR3: Low-Power DDR3 memory. * @MEM_DDR4: Unbuffered DDR4 RAM * @MEM_RDDR4: Registered DDR4 RAM * This is a variant of the DDR4 memories. * @MEM_LRDDR4: Load-Reduced DDR4 memory. + * @MEM_LPDDR4: Low-Power DDR4 memory. * @MEM_NVDIMM: Non-volatile RAM + * @MEM_WIO2: Wide I/O 2. */ enum mem_type { MEM_EMPTY = 0, @@ -200,10 +203,13 @@ enum mem_type { MEM_DDR3, MEM_RDDR3, MEM_LRDDR3, + MEM_LPDDR3, MEM_DDR4, MEM_RDDR4, MEM_LRDDR4, + MEM_LPDDR4, MEM_NVDIMM, + MEM_WIO2, }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -223,10 +229,13 @@ enum mem_type { #define MEM_FLAG_XDR BIT(MEM_XDR) #define MEM_FLAG_DDR3 BIT(MEM_DDR3) #define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) +#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3) #define MEM_FLAG_DDR4 BIT(MEM_DDR4) #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) +#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) +#define MEM_FLAG_WIO2 BIT(MEM_WIO2) /** * enum edac-type - Error Detection and Correction capabilities and mode -- cgit v1.2.3 From e4b27ebc780fa7fa951d81d241912755532568ff Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Tue, 3 Nov 2020 08:10:56 +0100 Subject: net: dsa: Add DSA driver for Hirschmann Hellcreek switches Add a basic DSA driver for Hirschmann Hellcreek switches. Those switches are implementing features needed for Time Sensitive Networking (TSN) such as support for the Time Precision Protocol and various shapers like the Time Aware Shaper. This driver includes basic support for networking: * VLAN handling * FDB handling * Port statistics * STP * Phylink Signed-off-by: Kurt Kanzenbach Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/linux/platform_data/hirschmann-hellcreek.h | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/platform_data/hirschmann-hellcreek.h (limited to 'include/linux') diff --git a/include/linux/platform_data/hirschmann-hellcreek.h b/include/linux/platform_data/hirschmann-hellcreek.h new file mode 100644 index 000000000000..388846766bb2 --- /dev/null +++ b/include/linux/platform_data/hirschmann-hellcreek.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: (GPL-2.0 or MIT) */ +/* + * Hirschmann Hellcreek TSN switch platform data. + * + * Copyright (C) 2020 Linutronix GmbH + * Author Kurt Kanzenbach + */ + +#ifndef _HIRSCHMANN_HELLCREEK_H_ +#define _HIRSCHMANN_HELLCREEK_H_ + +#include + +struct hellcreek_platform_data { + int num_ports; /* Amount of switch ports */ + int is_100_mbits; /* Is it configured to 100 or 1000 mbit/s */ + int qbv_support; /* Qbv support on front TSN ports */ + int qbv_on_cpu_port; /* Qbv support on the CPU port */ + int qbu_support; /* Qbu support on front TSN ports */ + u16 module_id; /* Module identificaton */ +}; + +#endif /* _HIRSCHMANN_HELLCREEK_H_ */ -- cgit v1.2.3 From 293e9a3d950dfebc76d9fa6931e6f91ef856b9ab Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Sun, 1 Nov 2020 14:50:56 +0200 Subject: net: phy: export phy_error and phy_trigger_machine These functions are currently used by phy_interrupt() to either signal an error condition or to trigger the link state machine. In an attempt to actually support shared PHY IRQs, export these two functions so that the actual PHY drivers can use them. Cc: Alexandru Ardelean Cc: Andre Edich Cc: Antoine Tenart Cc: Baruch Siach Cc: Christophe Leroy Cc: Dan Murphy Cc: Divya Koppera Cc: Florian Fainelli Cc: Hauke Mehrtens Cc: Heiner Kallweit Cc: Jerome Brunet Cc: Kavya Sree Kotagiri Cc: Linus Walleij Cc: Marco Felsch Cc: Marek Vasut Cc: Martin Blumenstingl Cc: Mathias Kresin Cc: Maxim Kochetkov Cc: Michael Walle Cc: Neil Armstrong Cc: Nisar Sayed Cc: Oleksij Rempel Cc: Philippe Schenker Cc: Willy Liu Cc: Yuiko Oshino Signed-off-by: Ioana Ciornei Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index eb3cb1a98b45..566b39f6cd64 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1570,8 +1570,10 @@ void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); +void phy_error(struct phy_device *phydev); void phy_state_machine(struct work_struct *work); void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies); +void phy_trigger_machine(struct phy_device *phydev); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); -- cgit v1.2.3 From 87de1f058aacc8ce4be94e36a38f77b860914a76 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Sun, 1 Nov 2020 14:51:12 +0200 Subject: net: phy: add genphy_handle_interrupt_no_ack() It seems there are cases where the interrupts are handled by another entity (ie an IRQ controller embedded inside the PHY) and do not need any other interraction from phylib. For this kind of PHYs, like the RTL8366RB, add the genphy_handle_interrupt_no_ack() function which just triggers the link state machine. Signed-off-by: Ioana Ciornei Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 566b39f6cd64..4f158d6352ae 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1510,6 +1510,7 @@ int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); int genphy_loopback(struct phy_device *phydev, bool enable); int genphy_soft_reset(struct phy_device *phydev); +irqreturn_t genphy_handle_interrupt_no_ack(struct phy_device *phydev); static inline int genphy_config_aneg(struct phy_device *phydev) { -- cgit v1.2.3 From d8c4a22363853e196497b06a8ee9be9773873a14 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 3 Nov 2020 18:23:53 +0100 Subject: bus: mhi: Add mhi_queue_is_full function This function can be used by client driver to determine whether it's possible to queue new elements in a channel ring. Signed-off-by: Loic Poulain Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1604424234-24446-1-git-send-email-loic.poulain@linaro.org Signed-off-by: Jakub Kicinski --- include/linux/mhi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d4841e5a5f45..b9bb7dff32e2 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -737,4 +737,11 @@ int mhi_queue_buf(struct mhi_device *mhi_dev, enum dma_data_direction dir, int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir, struct sk_buff *skb, size_t len, enum mhi_flags mflags); +/** + * mhi_queue_is_full - Determine whether queueing new elements is possible + * @mhi_dev: Device associated with the channels + * @dir: DMA direction for the channel + */ +bool mhi_queue_is_full(struct mhi_device *mhi_dev, enum dma_data_direction dir); + #endif /* _MHI_H_ */ -- cgit v1.2.3 From c1aedf015ebdd0232757a66e2daccf1246bd609c Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 4 Nov 2020 10:19:22 +0800 Subject: net/usb/r8153_ecm: support ECM mode for RTL8153 Support ECM mode based on cdc_ether with relative mii functions, when CONFIG_USB_RTL8152 is not set, or the device is not supported by r8152 driver. Both r8152 and r8153_ecm would check the return value of rtl8152_get_version() in porbe(). If rtl8152_get_version() return none zero value, the r8152 is used for the device with vendor mode. Otherwise, the r8153_ecm is used for the device with ECM mode. Signed-off-by: Hayes Wang Link: https://lore.kernel.org/r/1394712342-15778-392-Taiwan-albertk@realtek.com Signed-off-by: Jakub Kicinski --- include/linux/usb/r8152.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/linux/usb/r8152.h (limited to 'include/linux') diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h new file mode 100644 index 000000000000..20d88b1defc3 --- /dev/null +++ b/include/linux/usb/r8152.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020 Realtek Semiconductor Corp. All rights reserved. + */ + +#ifndef __LINUX_R8152_H +#define __LINUX_R8152_H + +#define RTL8152_REQT_READ 0xc0 +#define RTL8152_REQT_WRITE 0x40 +#define RTL8152_REQ_GET_REGS 0x05 +#define RTL8152_REQ_SET_REGS 0x05 + +#define BYTE_EN_DWORD 0xff +#define BYTE_EN_WORD 0x33 +#define BYTE_EN_BYTE 0x11 +#define BYTE_EN_SIX_BYTES 0x3f +#define BYTE_EN_START_MASK 0x0f +#define BYTE_EN_END_MASK 0xf0 + +#define MCU_TYPE_PLA 0x0100 +#define MCU_TYPE_USB 0x0000 + +/* Define these values to match your device */ +#define VENDOR_ID_REALTEK 0x0bda +#define VENDOR_ID_MICROSOFT 0x045e +#define VENDOR_ID_SAMSUNG 0x04e8 +#define VENDOR_ID_LENOVO 0x17ef +#define VENDOR_ID_LINKSYS 0x13b1 +#define VENDOR_ID_NVIDIA 0x0955 +#define VENDOR_ID_TPLINK 0x2357 + +#if IS_REACHABLE(CONFIG_USB_RTL8152) +extern u8 rtl8152_get_version(struct usb_interface *intf); +#endif + +#endif /* __LINUX_R8152_H */ -- cgit v1.2.3 From 8280c07e0762ba753876c427584a792e86f3f7e7 Mon Sep 17 00:00:00 2001 From: Kurt Lee Date: Mon, 12 Oct 2020 03:43:46 -0500 Subject: ieee80211: Add definition for WFA DPP Add Wi-Fi Alliance definition for DPP (Device Provisioning Protocol). Signed-off-by: Kurt Lee Signed-off-by: Wright Feng Link: https://lore.kernel.org/r/20201012084347.121557-2-wright.feng@cypress.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 770408b2fdaf..5e8cc9c3d45a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3417,6 +3417,8 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_AKM_SUITE_FT_PSK_SHA384 SUITE(0x000FAC, 19) #define WLAN_AKM_SUITE_PSK_SHA384 SUITE(0x000FAC, 20) +#define WLAN_AKM_SUITE_WFA_DPP SUITE(WLAN_OUI_WFA, 2) + #define WLAN_MAX_KEY_LEN 32 #define WLAN_PMK_NAME_LEN 16 @@ -3427,6 +3429,7 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_OUI_WFA 0x506f9a #define WLAN_OUI_TYPE_WFA_P2P 9 +#define WLAN_OUI_TYPE_WFA_DPP 0x1A #define WLAN_OUI_MICROSOFT 0x0050f2 #define WLAN_OUI_TYPE_MICROSOFT_WPA 1 #define WLAN_OUI_TYPE_MICROSOFT_WMM 2 -- cgit v1.2.3 From aec51036a166a0aecc26cba101bfbbdc53d64139 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 4 Nov 2020 19:35:17 +0000 Subject: tty: tty_io: Move 'tty_sysctl_init's prototype to shared space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/tty/tty_ldisc.c:883:6: warning: no previous prototype for ‘tty_sysctl_init’ [-Wmissing-prototypes] 883 | void tty_sysctl_init(void) | ^~~~~~~~~~~~~~~ Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Nick Holloway Cc: -- Cc: Marko Kohtala Cc: Bill Hawes Cc: "C. Scott Ananian" Cc: Russell King Cc: Andrew Morton Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20201104193549.4026187-5-lee.jones@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index a99e9b8e4e31..10212c6e4345 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -716,6 +716,7 @@ extern int __must_check tty_ldisc_init(struct tty_struct *tty); extern void tty_ldisc_deinit(struct tty_struct *tty); extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, char *f, int count); +extern void tty_sysctl_init(void); /* n_tty.c */ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); -- cgit v1.2.3 From 0264c8c9e1b53e9dbb41fae5e54756e84644bc60 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:36 -0500 Subject: ftrace: Move the recursion testing into global headers Currently, if a callback is registered to a ftrace function and its ftrace_ops does not have the RECURSION flag set, it is encapsulated in a helper function that does the recursion for it. Really, all the callbacks should have their own recursion protection for performance reasons. But they should not all implement their own. Move the recursion helpers to global headers, so that all callbacks can use them. Link: https://lkml.kernel.org/r/20201028115612.460535535@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.166456258@goodmis.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 1 + include/linux/trace_recursion.h | 187 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 include/linux/trace_recursion.h (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1bd3a0356ae4..0e4164a7f56d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -7,6 +7,7 @@ #ifndef _LINUX_FTRACE_H #define _LINUX_FTRACE_H +#include #include #include #include diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h new file mode 100644 index 000000000000..dbb7b6d4c94c --- /dev/null +++ b/include/linux/trace_recursion.h @@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TRACE_RECURSION_H +#define _LINUX_TRACE_RECURSION_H + +#include +#include + +#ifdef CONFIG_TRACING + +/* Only current can touch trace_recursion */ + +/* + * For function tracing recursion: + * The order of these bits are important. + * + * When function tracing occurs, the following steps are made: + * If arch does not support a ftrace feature: + * call internal function (uses INTERNAL bits) which calls... + * If callback is registered to the "global" list, the list + * function is called and recursion checks the GLOBAL bits. + * then this function calls... + * The function callback, which can use the FTRACE bits to + * check for recursion. + * + * Now if the arch does not support a feature, and it calls + * the global list function which calls the ftrace callback + * all three of these steps will do a recursion protection. + * There's no reason to do one if the previous caller already + * did. The recursion that we are protecting against will + * go through the same steps again. + * + * To prevent the multiple recursion checks, if a recursion + * bit is set that is higher than the MAX bit of the current + * check, then we know that the check was made by the previous + * caller, and we can skip the current check. + */ +enum { + /* Function recursion bits */ + TRACE_FTRACE_BIT, + TRACE_FTRACE_NMI_BIT, + TRACE_FTRACE_IRQ_BIT, + TRACE_FTRACE_SIRQ_BIT, + + /* INTERNAL_BITs must be greater than FTRACE_BITs */ + TRACE_INTERNAL_BIT, + TRACE_INTERNAL_NMI_BIT, + TRACE_INTERNAL_IRQ_BIT, + TRACE_INTERNAL_SIRQ_BIT, + + TRACE_BRANCH_BIT, +/* + * Abuse of the trace_recursion. + * As we need a way to maintain state if we are tracing the function + * graph in irq because we want to trace a particular function that + * was called in irq context but we have irq tracing off. Since this + * can only be modified by current, we can reuse trace_recursion. + */ + TRACE_IRQ_BIT, + + /* Set if the function is in the set_graph_function file */ + TRACE_GRAPH_BIT, + + /* + * In the very unlikely case that an interrupt came in + * at a start of graph tracing, and we want to trace + * the function in that interrupt, the depth can be greater + * than zero, because of the preempted start of a previous + * trace. In an even more unlikely case, depth could be 2 + * if a softirq interrupted the start of graph tracing, + * followed by an interrupt preempting a start of graph + * tracing in the softirq, and depth can even be 3 + * if an NMI came in at the start of an interrupt function + * that preempted a softirq start of a function that + * preempted normal context!!!! Luckily, it can't be + * greater than 3, so the next two bits are a mask + * of what the depth is when we set TRACE_GRAPH_BIT + */ + + TRACE_GRAPH_DEPTH_START_BIT, + TRACE_GRAPH_DEPTH_END_BIT, + + /* + * To implement set_graph_notrace, if this bit is set, we ignore + * function graph tracing of called functions, until the return + * function is called to clear it. + */ + TRACE_GRAPH_NOTRACE_BIT, + + /* + * When transitioning between context, the preempt_count() may + * not be correct. Allow for a single recursion to cover this case. + */ + TRACE_TRANSITION_BIT, +}; + +#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) +#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) +#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) + +#define trace_recursion_depth() \ + (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) +#define trace_recursion_set_depth(depth) \ + do { \ + current->trace_recursion &= \ + ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ + current->trace_recursion |= \ + ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ + } while (0) + +#define TRACE_CONTEXT_BITS 4 + +#define TRACE_FTRACE_START TRACE_FTRACE_BIT +#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) + +#define TRACE_LIST_START TRACE_INTERNAL_BIT +#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) + +#define TRACE_CONTEXT_MASK TRACE_LIST_MAX + +static __always_inline int trace_get_context_bit(void) +{ + int bit; + + if (in_interrupt()) { + if (in_nmi()) + bit = 0; + + else if (in_irq()) + bit = 1; + else + bit = 2; + } else + bit = 3; + + return bit; +} + +static __always_inline int trace_test_and_set_recursion(int start, int max) +{ + unsigned int val = current->trace_recursion; + int bit; + + /* A previous recursion check was made */ + if ((val & TRACE_CONTEXT_MASK) > max) + return 0; + + bit = trace_get_context_bit() + start; + if (unlikely(val & (1 << bit))) { + /* + * It could be that preempt_count has not been updated during + * a switch between contexts. Allow for a single recursion. + */ + bit = TRACE_TRANSITION_BIT; + if (trace_recursion_test(bit)) + return -1; + trace_recursion_set(bit); + barrier(); + return bit + 1; + } + + /* Normal check passed, clear the transition to allow it again */ + trace_recursion_clear(TRACE_TRANSITION_BIT); + + val |= 1 << bit; + current->trace_recursion = val; + barrier(); + + return bit + 1; +} + +static __always_inline void trace_clear_recursion(int bit) +{ + unsigned int val = current->trace_recursion; + + if (!bit) + return; + + bit--; + bit = 1 << bit; + val &= ~bit; + + barrier(); + current->trace_recursion = val; +} + +#endif /* CONFIG_TRACING */ +#endif /* _LINUX_TRACE_RECURSION_H */ -- cgit v1.2.3 From 6e4eb9cb22fc8a893cb708ed42644de5ee7c3827 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:37 -0500 Subject: ftrace: Add ftrace_test_recursion_trylock() helper function To make it easier for ftrace callbacks to have recursion protection, provide a ftrace_test_recursion_trylock() and ftrace_test_recursion_unlock() helper that tests for recursion. Link: https://lkml.kernel.org/r/20201028115612.634927593@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.378584067@goodmis.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_recursion.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index dbb7b6d4c94c..f2a949dbfec7 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -183,5 +183,30 @@ static __always_inline void trace_clear_recursion(int bit) current->trace_recursion = val; } +/** + * ftrace_test_recursion_trylock - tests for recursion in same context + * + * Use this for ftrace callbacks. This will detect if the function + * tracing recursed in the same context (normal vs interrupt), + * + * Returns: -1 if a recursion happened. + * >= 0 if no recursion + */ +static __always_inline int ftrace_test_recursion_trylock(void) +{ + return trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX); +} + +/** + * ftrace_test_recursion_unlock - called when function callback is complete + * @bit: The return of a successful ftrace_test_recursion_trylock() + * + * This is used at the end of a ftrace callback. + */ +static __always_inline void ftrace_test_recursion_unlock(int bit) +{ + trace_clear_recursion(bit); +} + #endif /* CONFIG_TRACING */ #endif /* _LINUX_TRACE_RECURSION_H */ -- cgit v1.2.3 From da5afbeb1724609996ca7bb4fbce2cd104c95914 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:38 -0500 Subject: ftrace: Optimize testing what context current is in The preempt_count() is not a simple location in memory, it could be part of per_cpu code or more. Each access to preempt_count(), or one of its accessor functions (like in_interrupt()) takes several cycles. By reading preempt_count() once, and then doing tests to find the context against the value return is slightly faster than using in_nmi() and in_interrupt(). Link: https://lkml.kernel.org/r/20201028115612.780796355@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.558881845@goodmis.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_recursion.h | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index f2a949dbfec7..ac3d73484cb2 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -117,22 +117,29 @@ enum { #define TRACE_CONTEXT_MASK TRACE_LIST_MAX +/* + * Used for setting context + * NMI = 0 + * IRQ = 1 + * SOFTIRQ = 2 + * NORMAL = 3 + */ +enum { + TRACE_CTX_NMI, + TRACE_CTX_IRQ, + TRACE_CTX_SOFTIRQ, + TRACE_CTX_NORMAL, +}; + static __always_inline int trace_get_context_bit(void) { - int bit; - - if (in_interrupt()) { - if (in_nmi()) - bit = 0; - - else if (in_irq()) - bit = 1; - else - bit = 2; - } else - bit = 3; + unsigned long pc = preempt_count(); - return bit; + if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) + return TRACE_CTX_NORMAL; + else + return pc & NMI_MASK ? TRACE_CTX_NMI : + pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ; } static __always_inline int trace_test_and_set_recursion(int start, int max) -- cgit v1.2.3 From a25d036d939a30623ff73ecad9c8b9116b02e823 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:45 -0500 Subject: ftrace: Reverse what the RECURSION flag means in the ftrace_ops Now that all callbacks are recursion safe, reverse the meaning of the RECURSION flag and rename it from RECURSION_SAFE to simply RECURSION. Now only callbacks that request to have recursion protecting it will have the added trampoline to do so. Also remove the outdated comment about "PER_CPU" when determining to use the ftrace_ops_assist_func. Link: https://lkml.kernel.org/r/20201028115613.742454631@goodmis.org Link: https://lkml.kernel.org/r/20201106023547.904270143@goodmis.org Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Jonathan Corbet Cc: Sebastian Andrzej Siewior Cc: Miroslav Benes Cc: Kamalesh Babulal Cc: Petr Mladek Cc: linux-doc@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0e4164a7f56d..806196345c3f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -98,7 +98,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are * set in the flags member. - * CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION_SAFE, STUB and + * CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION, STUB and * IPMODIFY are a kind of attribute flags which can be set only before * registering the ftrace_ops, and can not be modified while registered. * Changing those attribute flags after registering ftrace_ops will @@ -121,10 +121,10 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * passing regs to the handler. * Note, if this flag is set, the SAVE_REGS flag will automatically * get set upon registering the ftrace_ops, if the arch supports it. - * RECURSION_SAFE - The ftrace_ops can set this to tell the ftrace infrastructure - * that the call back has its own recursion protection. If it does - * not set this, then the ftrace infrastructure will add recursion - * protection for the caller. + * RECURSION - The ftrace_ops can set this to tell the ftrace infrastructure + * that the call back needs recursion protection. If it does + * not set this, then the ftrace infrastructure will assume + * that the callback can handle recursion on its own. * STUB - The ftrace_ops is just a place holder. * INITIALIZED - The ftrace_ops has already been initialized (first use time * register_ftrace_function() is called, it will initialized the ops) @@ -156,7 +156,7 @@ enum { FTRACE_OPS_FL_DYNAMIC = BIT(1), FTRACE_OPS_FL_SAVE_REGS = BIT(2), FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = BIT(3), - FTRACE_OPS_FL_RECURSION_SAFE = BIT(4), + FTRACE_OPS_FL_RECURSION = BIT(4), FTRACE_OPS_FL_STUB = BIT(5), FTRACE_OPS_FL_INITIALIZED = BIT(6), FTRACE_OPS_FL_DELETED = BIT(7), -- cgit v1.2.3 From 773c16705058e9be7b0f4ce124e89cd231c120a2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:46 -0500 Subject: ftrace: Add recording of functions that caused recursion This adds CONFIG_FTRACE_RECORD_RECURSION that will record to a file "recursed_functions" all the functions that caused recursion while a callback to the function tracer was running. Link: https://lkml.kernel.org/r/20201106023548.102375687@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Guo Ren Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Thomas Gleixner Cc: Borislav Petkov Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: Kees Cook Cc: Anton Vorontsov Cc: Colin Cross Cc: Tony Luck Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Petr Mladek Cc: Joe Lawrence Cc: Kamalesh Babulal Cc: Mauro Carvalho Chehab Cc: Sebastian Andrzej Siewior Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-csky@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Cc: live-patching@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_recursion.h | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index ac3d73484cb2..228cc56ed66e 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -91,6 +91,9 @@ enum { * not be correct. Allow for a single recursion to cover this case. */ TRACE_TRANSITION_BIT, + + /* Used to prevent recursion recording from recursing. */ + TRACE_RECORD_RECURSION_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -142,7 +145,22 @@ static __always_inline int trace_get_context_bit(void) pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ; } -static __always_inline int trace_test_and_set_recursion(int start, int max) +#ifdef CONFIG_FTRACE_RECORD_RECURSION +extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); +# define do_ftrace_record_recursion(ip, pip) \ + do { \ + if (!trace_recursion_test(TRACE_RECORD_RECURSION_BIT)) { \ + trace_recursion_set(TRACE_RECORD_RECURSION_BIT); \ + ftrace_record_recursion(ip, pip); \ + trace_recursion_clear(TRACE_RECORD_RECURSION_BIT); \ + } \ + } while (0) +#else +# define do_ftrace_record_recursion(ip, pip) do { } while (0) +#endif + +static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, + int start, int max) { unsigned int val = current->trace_recursion; int bit; @@ -158,8 +176,10 @@ static __always_inline int trace_test_and_set_recursion(int start, int max) * a switch between contexts. Allow for a single recursion. */ bit = TRACE_TRANSITION_BIT; - if (trace_recursion_test(bit)) + if (trace_recursion_test(bit)) { + do_ftrace_record_recursion(ip, pip); return -1; + } trace_recursion_set(bit); barrier(); return bit + 1; @@ -199,9 +219,10 @@ static __always_inline void trace_clear_recursion(int bit) * Returns: -1 if a recursion happened. * >= 0 if no recursion */ -static __always_inline int ftrace_test_recursion_trylock(void) +static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, + unsigned long parent_ip) { - return trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX); + return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX); } /** -- cgit v1.2.3 From 88b8138b240b43d5215bf7cb422692cd8db51f6f Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 6 Nov 2020 14:03:31 +0100 Subject: tty: serial: remove pnx8xxx uart driver Commit 625326ea9c84 ("MIPS: Remove PNX833x alias NXP_STB22x") removed support for PNX833x, so it's time to remove serial driver, too. Signed-off-by: Thomas Bogendoerfer Link: https://lore.kernel.org/r/20201106130332.103476-1-tsbogend@alpha.franken.de Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_pnx8xxx.h | 67 ------------------------------------------ 1 file changed, 67 deletions(-) delete mode 100644 include/linux/serial_pnx8xxx.h (limited to 'include/linux') diff --git a/include/linux/serial_pnx8xxx.h b/include/linux/serial_pnx8xxx.h deleted file mode 100644 index 619d748dcd44..000000000000 --- a/include/linux/serial_pnx8xxx.h +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Embedded Alley Solutions, source@embeddedalley.com. - */ - -#ifndef _LINUX_SERIAL_PNX8XXX_H -#define _LINUX_SERIAL_PNX8XXX_H - -#include - -#define PNX8XXX_NR_PORTS 2 - -struct pnx8xxx_port { - struct uart_port port; - struct timer_list timer; - unsigned int old_status; -}; - -/* register offsets */ -#define PNX8XXX_LCR 0 -#define PNX8XXX_MCR 0x004 -#define PNX8XXX_BAUD 0x008 -#define PNX8XXX_CFG 0x00c -#define PNX8XXX_FIFO 0x028 -#define PNX8XXX_ISTAT 0xfe0 -#define PNX8XXX_IEN 0xfe4 -#define PNX8XXX_ICLR 0xfe8 -#define PNX8XXX_ISET 0xfec -#define PNX8XXX_PD 0xff4 -#define PNX8XXX_MID 0xffc - -#define PNX8XXX_UART_LCR_TXBREAK (1<<30) -#define PNX8XXX_UART_LCR_PAREVN 0x10000000 -#define PNX8XXX_UART_LCR_PAREN 0x08000000 -#define PNX8XXX_UART_LCR_2STOPB 0x04000000 -#define PNX8XXX_UART_LCR_8BIT 0x01000000 -#define PNX8XXX_UART_LCR_TX_RST 0x00040000 -#define PNX8XXX_UART_LCR_RX_RST 0x00020000 -#define PNX8XXX_UART_LCR_RX_NEXT 0x00010000 - -#define PNX8XXX_UART_MCR_SCR 0xFF000000 -#define PNX8XXX_UART_MCR_DCD 0x00800000 -#define PNX8XXX_UART_MCR_CTS 0x00100000 -#define PNX8XXX_UART_MCR_LOOP 0x00000010 -#define PNX8XXX_UART_MCR_RTS 0x00000002 -#define PNX8XXX_UART_MCR_DTR 0x00000001 - -#define PNX8XXX_UART_INT_TX 0x00000080 -#define PNX8XXX_UART_INT_EMPTY 0x00000040 -#define PNX8XXX_UART_INT_RCVTO 0x00000020 -#define PNX8XXX_UART_INT_RX 0x00000010 -#define PNX8XXX_UART_INT_RXOVRN 0x00000008 -#define PNX8XXX_UART_INT_FRERR 0x00000004 -#define PNX8XXX_UART_INT_BREAK 0x00000002 -#define PNX8XXX_UART_INT_PARITY 0x00000001 -#define PNX8XXX_UART_INT_ALLRX 0x0000003F -#define PNX8XXX_UART_INT_ALLTX 0x000000C0 - -#define PNX8XXX_UART_FIFO_TXFIFO 0x001F0000 -#define PNX8XXX_UART_FIFO_TXFIFO_STA (0x1f<<16) -#define PNX8XXX_UART_FIFO_RXBRK 0x00008000 -#define PNX8XXX_UART_FIFO_RXFE 0x00004000 -#define PNX8XXX_UART_FIFO_RXPAR 0x00002000 -#define PNX8XXX_UART_FIFO_RXFIFO 0x00001F00 -#define PNX8XXX_UART_FIFO_RBRTHR 0x000000FF - -#endif -- cgit v1.2.3 From 4cf1bc1f10452065a29d576fc5693fc4fab5b919 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:40 +0000 Subject: bpf: Implement task local storage Similar to bpf_local_storage for sockets and inodes add local storage for task_struct. The life-cycle of storage is managed with the life-cycle of the task_struct. i.e. the storage is destroyed along with the owning task with a callback to the bpf_task_storage_free from the task_free LSM hook. The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the security blob which are now stackable and can co-exist with other LSMs. The userspace map operations can be done by using a pid fd as a key passed to the lookup, update and delete operations. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-3-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 23 +++++++++++++++++++++++ include/linux/bpf_types.h | 1 + 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index aaacb6aafc87..73226181b744 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -7,6 +7,7 @@ #ifndef _LINUX_BPF_LSM_H #define _LINUX_BPF_LSM_H +#include #include #include @@ -35,9 +36,21 @@ static inline struct bpf_storage_blob *bpf_inode( return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; } +static inline struct bpf_storage_blob *bpf_task( + const struct task_struct *task) +{ + if (unlikely(!task->security)) + return NULL; + + return task->security + bpf_lsm_blob_sizes.lbs_task; +} + extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto; +extern const struct bpf_func_proto bpf_task_storage_get_proto; +extern const struct bpf_func_proto bpf_task_storage_delete_proto; void bpf_inode_storage_free(struct inode *inode); +void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_LSM */ @@ -53,10 +66,20 @@ static inline struct bpf_storage_blob *bpf_inode( return NULL; } +static inline struct bpf_storage_blob *bpf_task( + const struct task_struct *task) +{ + return NULL; +} + static inline void bpf_inode_storage_free(struct inode *inode) { } +static inline void bpf_task_storage_free(struct task_struct *task) +{ +} + #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2e6f568377f1..99f7fd657d87 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -109,6 +109,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) #endif #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) -- cgit v1.2.3 From 3ca1032ab7ab010eccb107aa515598788f7d93bb Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:43 +0000 Subject: bpf: Implement get_current_task_btf and RET_PTR_TO_BTF_ID The currently available bpf_get_current_task returns an unsigned integer which can be used along with BPF_CORE_READ to read data from the task_struct but still cannot be used as an input argument to a helper that accepts an ARG_PTR_TO_BTF_ID of type task_struct. In order to implement this helper a new return type, RET_PTR_TO_BTF_ID, is added. This is similar to RET_PTR_TO_BTF_ID_OR_NULL but does not require checking the nullness of returned pointer. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-6-kpsingh@chromium.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2fffd30e13ac..73d5381a5d5c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -310,6 +310,7 @@ enum bpf_return_type { RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ + RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs -- cgit v1.2.3 From d4d50710a8b46082224376ef119a4dbb75b25c56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Nov 2020 09:27:33 +0100 Subject: seq_file: add seq_read_iter iov_iter based variant for reading a seq_file. seq_read is reimplemented on top of the iter variant. Signed-off-by: Christoph Hellwig Tested-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 813614d4b71f..b83b3ae3c877 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -107,6 +107,7 @@ void seq_pad(struct seq_file *m, char c); char *mangle_path(char *s, const char *p, const char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); +ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); int seq_write(struct seq_file *seq, const void *data, size_t len); -- cgit v1.2.3 From b819fd9da38508e0504624b87d9983fcc4237f3c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:14 +0100 Subject: highmem: Remove unused functions Nothing uses totalhigh_pages_dec() and totalhigh_pages_set(). Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: Andrew Morton Link: https://lore.kernel.org/r/20201103095856.732891880@linutronix.de --- include/linux/highmem.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 14e6202ce47f..f5c31338f0a3 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -104,21 +104,11 @@ static inline void totalhigh_pages_inc(void) atomic_long_inc(&_totalhigh_pages); } -static inline void totalhigh_pages_dec(void) -{ - atomic_long_dec(&_totalhigh_pages); -} - static inline void totalhigh_pages_add(long count) { atomic_long_add(count, &_totalhigh_pages); } -static inline void totalhigh_pages_set(long val) -{ - atomic_long_set(&_totalhigh_pages, val); -} - void kmap_flush_unused(void); struct page *kmap_to_page(void *addr); -- cgit v1.2.3 From 298fa1ad5571f59cb3ca5497a9455f36867f065e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:18 +0100 Subject: highmem: Provide generic variant of kmap_atomic* The kmap_atomic* interfaces in all architectures are pretty much the same except for post map operations (flush) and pre- and post unmap operations. Provide a generic variant for that. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Christoph Hellwig Cc: Andrew Morton Link: https://lore.kernel.org/r/20201103095857.175939340@linutronix.de --- include/linux/highmem.h | 82 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index f5c31338f0a3..f5ecee9c2576 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -31,9 +31,16 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size) #include +/* + * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. + */ +#ifdef CONFIG_KMAP_LOCAL +void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); +void *__kmap_local_page_prot(struct page *page, pgprot_t prot); +void kunmap_local_indexed(void *vaddr); +#endif + #ifdef CONFIG_HIGHMEM -extern void *kmap_atomic_high_prot(struct page *page, pgprot_t prot); -extern void kunmap_atomic_high(void *kvaddr); #include #ifndef ARCH_HAS_KMAP_FLUSH_TLB @@ -81,6 +88,11 @@ static inline void kunmap(struct page *page) * be used in IRQ contexts, so in some (very limited) cases we need * it. */ + +#ifndef CONFIG_KMAP_LOCAL +void *kmap_atomic_high_prot(struct page *page, pgprot_t prot); +void kunmap_atomic_high(void *kvaddr); + static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) { preempt_disable(); @@ -89,7 +101,38 @@ static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) return page_address(page); return kmap_atomic_high_prot(page, prot); } -#define kmap_atomic(page) kmap_atomic_prot(page, kmap_prot) + +static inline void __kunmap_atomic(void *vaddr) +{ + kunmap_atomic_high(vaddr); +} +#else /* !CONFIG_KMAP_LOCAL */ + +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_page_prot(page, prot); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_pfn_prot(pfn, kmap_prot); +} + +static inline void __kunmap_atomic(void *addr) +{ + kunmap_local_indexed(addr); +} + +#endif /* CONFIG_KMAP_LOCAL */ + +static inline void *kmap_atomic(struct page *page) +{ + return kmap_atomic_prot(page, kmap_prot); +} /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); @@ -147,25 +190,33 @@ static inline void *kmap_atomic(struct page *page) pagefault_disable(); return page_address(page); } -#define kmap_atomic_prot(page, prot) kmap_atomic(page) -static inline void kunmap_atomic_high(void *addr) +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + return kmap_atomic(page); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + return kmap_atomic(pfn_to_page(pfn)); +} + +static inline void __kunmap_atomic(void *addr) { /* * Mostly nothing to do in the CONFIG_HIGHMEM=n case as kunmap_atomic() - * handles re-enabling faults + preemption + * handles re-enabling faults and preemption */ #ifdef ARCH_HAS_FLUSH_ON_KUNMAP kunmap_flush_on_unmap(addr); #endif } -#define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) - #define kmap_flush_unused() do {} while(0) #endif /* CONFIG_HIGHMEM */ +#if !defined(CONFIG_KMAP_LOCAL) #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) DECLARE_PER_CPU(int, __kmap_atomic_idx); @@ -196,22 +247,21 @@ static inline void kmap_atomic_idx_pop(void) __this_cpu_dec(__kmap_atomic_idx); #endif } - +#endif #endif /* * Prevent people trying to call kunmap_atomic() as if it were kunmap() * kunmap_atomic() should get the return value of kmap_atomic, not the page. */ -#define kunmap_atomic(addr) \ -do { \ - BUILD_BUG_ON(__same_type((addr), struct page *)); \ - kunmap_atomic_high(addr); \ - pagefault_enable(); \ - preempt_enable(); \ +#define kunmap_atomic(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_atomic(__addr); \ + pagefault_enable(); \ + preempt_enable(); \ } while (0) - /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) -- cgit v1.2.3 From 157e118b55113d1e6c7f8ddfcec0a1dbf3a69511 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:20 +0100 Subject: x86/mm/highmem: Use generic kmap atomic implementation Convert X86 to the generic kmap atomic implementation and make the iomap_atomic() naming convention consistent while at it. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201103095857.375127260@linutronix.de --- include/linux/highmem.h | 2 +- include/linux/io-mapping.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index f5ecee9c2576..1222a31be842 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -217,7 +217,7 @@ static inline void __kunmap_atomic(void *addr) #endif /* CONFIG_HIGHMEM */ #if !defined(CONFIG_KMAP_LOCAL) -#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) +#if defined(CONFIG_HIGHMEM) DECLARE_PER_CPU(int, __kmap_atomic_idx); diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index c75e4d3d8833..3b0940be72e9 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -69,7 +69,7 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, BUG_ON(offset >= mapping->size); phys_addr = mapping->base + offset; - return iomap_atomic_prot_pfn(PHYS_PFN(phys_addr), mapping->prot); + return iomap_atomic_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); } static inline void -- cgit v1.2.3 From d7029e4549691ecaf1ead536d3322a00bda85659 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:30 +0100 Subject: highmem: Get rid of kmap_types.h The header is not longer used and on alpha, ia64, openrisc, parisc and um it was completely unused anyway as these architectures have no highmem support. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: Andrew Morton Cc: Arnd Bergmann Link: https://lore.kernel.org/r/20201103095858.422094352@linutronix.de --- include/linux/highmem.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 1222a31be842..de78869454b1 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -29,8 +29,6 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size) } #endif -#include - /* * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. */ -- cgit v1.2.3 From 3c1016b53c311906878c703af1e2b29855a9a962 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:31 +0100 Subject: mm/highmem: Remove the old kmap_atomic cruft All users gone. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Christoph Hellwig Cc: Andrew Morton Cc: Arnd Bergmann Link: https://lore.kernel.org/r/20201103095858.516281567@linutronix.de --- include/linux/highmem.h | 63 ++++--------------------------------------------- 1 file changed, 4 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index de78869454b1..3180a8f7307f 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -86,31 +86,16 @@ static inline void kunmap(struct page *page) * be used in IRQ contexts, so in some (very limited) cases we need * it. */ - -#ifndef CONFIG_KMAP_LOCAL -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot); -void kunmap_atomic_high(void *kvaddr); - static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) { preempt_disable(); pagefault_disable(); - if (!PageHighMem(page)) - return page_address(page); - return kmap_atomic_high_prot(page, prot); -} - -static inline void __kunmap_atomic(void *vaddr) -{ - kunmap_atomic_high(vaddr); + return __kmap_local_page_prot(page, prot); } -#else /* !CONFIG_KMAP_LOCAL */ -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +static inline void *kmap_atomic(struct page *page) { - preempt_disable(); - pagefault_disable(); - return __kmap_local_page_prot(page, prot); + return kmap_atomic_prot(page, kmap_prot); } static inline void *kmap_atomic_pfn(unsigned long pfn) @@ -125,13 +110,6 @@ static inline void __kunmap_atomic(void *addr) kunmap_local_indexed(addr); } -#endif /* CONFIG_KMAP_LOCAL */ - -static inline void *kmap_atomic(struct page *page) -{ - return kmap_atomic_prot(page, kmap_prot); -} - /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); extern atomic_long_t _totalhigh_pages; @@ -212,41 +190,8 @@ static inline void __kunmap_atomic(void *addr) #define kmap_flush_unused() do {} while(0) -#endif /* CONFIG_HIGHMEM */ - -#if !defined(CONFIG_KMAP_LOCAL) -#if defined(CONFIG_HIGHMEM) - -DECLARE_PER_CPU(int, __kmap_atomic_idx); - -static inline int kmap_atomic_idx_push(void) -{ - int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; - -#ifdef CONFIG_DEBUG_HIGHMEM - WARN_ON_ONCE(in_irq() && !irqs_disabled()); - BUG_ON(idx >= KM_TYPE_NR); -#endif - return idx; -} - -static inline int kmap_atomic_idx(void) -{ - return __this_cpu_read(__kmap_atomic_idx) - 1; -} -static inline void kmap_atomic_idx_pop(void) -{ -#ifdef CONFIG_DEBUG_HIGHMEM - int idx = __this_cpu_dec_return(__kmap_atomic_idx); - - BUG_ON(idx < 0); -#else - __this_cpu_dec(__kmap_atomic_idx); -#endif -} -#endif -#endif +#endif /* CONFIG_HIGHMEM */ /* * Prevent people trying to call kunmap_atomic() as if it were kunmap() -- cgit v1.2.3 From 351191ad55c8a1eccaf23e4187c62056229c0779 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:32 +0100 Subject: io-mapping: Cleanup atomic iomap Switch the atomic iomap implementation over to kmap_local and stick the preempt/pagefault mechanics into the generic code similar to the kmap_atomic variants. Rename the x86 map function in preparation for a non-atomic variant. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Christoph Hellwig Cc: Andrew Morton Link: https://lore.kernel.org/r/20201103095858.625310005@linutronix.de --- include/linux/io-mapping.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 3b0940be72e9..60e7c83e4904 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -69,13 +69,17 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, BUG_ON(offset >= mapping->size); phys_addr = mapping->base + offset; - return iomap_atomic_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); + preempt_disable(); + pagefault_disable(); + return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); } static inline void io_mapping_unmap_atomic(void __iomem *vaddr) { - iounmap_atomic(vaddr); + kunmap_local_indexed((void __force *)vaddr); + pagefault_enable(); + preempt_enable(); } static inline void __iomem * -- cgit v1.2.3 From 13f876ba77ebd5125799bb042201f22cf73df154 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:34 +0100 Subject: highmem: High implementation details and document API Move the gory details of kmap & al into a private header and only document the interfaces which are usable by drivers. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Christoph Hellwig Cc: Andrew Morton Link: https://lore.kernel.org/r/20201103095858.827582066@linutronix.de --- include/linux/highmem-internal.h | 174 +++++++++++++++++++++++++ include/linux/highmem.h | 266 ++++++++++++++------------------------- 2 files changed, 270 insertions(+), 170 deletions(-) create mode 100644 include/linux/highmem-internal.h (limited to 'include/linux') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h new file mode 100644 index 000000000000..6ceed907b14e --- /dev/null +++ b/include/linux/highmem-internal.h @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_HIGHMEM_INTERNAL_H +#define _LINUX_HIGHMEM_INTERNAL_H + +/* + * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. + */ +#ifdef CONFIG_KMAP_LOCAL +void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); +void *__kmap_local_page_prot(struct page *page, pgprot_t prot); +void kunmap_local_indexed(void *vaddr); +#endif + +#ifdef CONFIG_HIGHMEM +#include + +#ifndef ARCH_HAS_KMAP_FLUSH_TLB +static inline void kmap_flush_tlb(unsigned long addr) { } +#endif + +#ifndef kmap_prot +#define kmap_prot PAGE_KERNEL +#endif + +void *kmap_high(struct page *page); +void kunmap_high(struct page *page); +void __kmap_flush_unused(void); +struct page *__kmap_to_page(void *addr); + +static inline void *kmap(struct page *page) +{ + void *addr; + + might_sleep(); + if (!PageHighMem(page)) + addr = page_address(page); + else + addr = kmap_high(page); + kmap_flush_tlb((unsigned long)addr); + return addr; +} + +static inline void kunmap(struct page *page) +{ + might_sleep(); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} + +static inline struct page *kmap_to_page(void *addr) +{ + return __kmap_to_page(addr); +} + +static inline void kmap_flush_unused(void) +{ + __kmap_flush_unused(); +} + +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_page_prot(page, prot); +} + +static inline void *kmap_atomic(struct page *page) +{ + return kmap_atomic_prot(page, kmap_prot); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_pfn_prot(pfn, kmap_prot); +} + +static inline void __kunmap_atomic(void *addr) +{ + kunmap_local_indexed(addr); + pagefault_enable(); + preempt_enable(); +} + +unsigned int __nr_free_highpages(void); +extern atomic_long_t _totalhigh_pages; + +static inline unsigned int nr_free_highpages(void) +{ + return __nr_free_highpages(); +} + +static inline unsigned long totalhigh_pages(void) +{ + return (unsigned long)atomic_long_read(&_totalhigh_pages); +} + +static inline void totalhigh_pages_inc(void) +{ + atomic_long_inc(&_totalhigh_pages); +} + +static inline void totalhigh_pages_add(long count) +{ + atomic_long_add(count, &_totalhigh_pages); +} + +#else /* CONFIG_HIGHMEM */ + +static inline struct page *kmap_to_page(void *addr) +{ + return virt_to_page(addr); +} + +static inline void *kmap(struct page *page) +{ + might_sleep(); + return page_address(page); +} + +static inline void kunmap_high(struct page *page) { } +static inline void kmap_flush_unused(void) { } + +static inline void kunmap(struct page *page) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(page_address(page)); +#endif +} + +static inline void *kmap_atomic(struct page *page) +{ + preempt_disable(); + pagefault_disable(); + return page_address(page); +} + +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + return kmap_atomic(page); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + return kmap_atomic(pfn_to_page(pfn)); +} + +static inline void __kunmap_atomic(void *addr) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(addr); +#endif + pagefault_enable(); + preempt_enable(); +} + +static inline unsigned int nr_free_highpages(void) { return 0; } +static inline unsigned long totalhigh_pages(void) { return 0UL; } + +#endif /* CONFIG_HIGHMEM */ + +/* + * Prevent people trying to call kunmap_atomic() as if it were kunmap() + * kunmap_atomic() should get the return value of kmap_atomic, not the page. + */ +#define kunmap_atomic(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_atomic(__addr); \ +} while (0) + +#endif diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 3180a8f7307f..7d098bd621f6 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -11,199 +11,125 @@ #include -#ifndef ARCH_HAS_FLUSH_ANON_PAGE -static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) -{ -} -#endif - -#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -static inline void flush_kernel_dcache_page(struct page *page) -{ -} -static inline void flush_kernel_vmap_range(void *vaddr, int size) -{ -} -static inline void invalidate_kernel_vmap_range(void *vaddr, int size) -{ -} -#endif +#include "highmem-internal.h" -/* - * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. +/** + * kmap - Map a page for long term usage + * @page: Pointer to the page to be mapped + * + * Returns: The virtual address of the mapping + * + * Can only be invoked from preemptible task context because on 32bit + * systems with CONFIG_HIGHMEM enabled this function might sleep. + * + * For systems with CONFIG_HIGHMEM=n and for pages in the low memory area + * this returns the virtual address of the direct kernel mapping. + * + * The returned virtual address is globally visible and valid up to the + * point where it is unmapped via kunmap(). The pointer can be handed to + * other contexts. + * + * For highmem pages on 32bit systems this can be slow as the mapping space + * is limited and protected by a global lock. In case that there is no + * mapping slot available the function blocks until a slot is released via + * kunmap(). */ -#ifdef CONFIG_KMAP_LOCAL -void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); -void *__kmap_local_page_prot(struct page *page, pgprot_t prot); -void kunmap_local_indexed(void *vaddr); -#endif - -#ifdef CONFIG_HIGHMEM -#include +static inline void *kmap(struct page *page); -#ifndef ARCH_HAS_KMAP_FLUSH_TLB -static inline void kmap_flush_tlb(unsigned long addr) { } -#endif - -#ifndef kmap_prot -#define kmap_prot PAGE_KERNEL -#endif - -void *kmap_high(struct page *page); -static inline void *kmap(struct page *page) -{ - void *addr; - - might_sleep(); - if (!PageHighMem(page)) - addr = page_address(page); - else - addr = kmap_high(page); - kmap_flush_tlb((unsigned long)addr); - return addr; -} +/** + * kunmap - Unmap the virtual address mapped by kmap() + * @addr: Virtual address to be unmapped + * + * Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of + * pages in the low memory area. + */ +static inline void kunmap(struct page *page); -void kunmap_high(struct page *page); +/** + * kmap_to_page - Get the page for a kmap'ed address + * @addr: The address to look up + * + * Returns: The page which is mapped to @addr. + */ +static inline struct page *kmap_to_page(void *addr); -static inline void kunmap(struct page *page) -{ - might_sleep(); - if (!PageHighMem(page)) - return; - kunmap_high(page); -} +/** + * kmap_flush_unused - Flush all unused kmap mappings in order to + * remove stray mappings + */ +static inline void kmap_flush_unused(void); -/* - * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because - * no global lock is needed and because the kmap code must perform a global TLB - * invalidation when the kmap pool wraps. +/** + * kmap_atomic - Atomically map a page for temporary usage + * @page: Pointer to the page to be mapped + * + * Returns: The virtual address of the mapping + * + * Side effect: On return pagefaults and preemption are disabled. + * + * Can be invoked from any context. * - * However when holding an atomic kmap it is not legal to sleep, so atomic - * kmaps are appropriate for short, tight code paths only. + * Requires careful handling when nesting multiple mappings because the map + * management is stack based. The unmap has to be in the reverse order of + * the map operation: * - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need - * it. + * addr1 = kmap_atomic(page1); + * addr2 = kmap_atomic(page2); + * ... + * kunmap_atomic(addr2); + * kunmap_atomic(addr1); + * + * Unmapping addr1 before addr2 is invalid and causes malfunction. + * + * Contrary to kmap() mappings the mapping is only valid in the context of + * the caller and cannot be handed to other contexts. + * + * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the + * virtual address of the direct mapping. Only real highmem pages are + * temporarily mapped. + * + * While it is significantly faster than kmap() it comes with restrictions + * about the pointer validity and the side effects of disabling page faults + * and preemption. Use it only when absolutely necessary, e.g. from non + * preemptible contexts. */ -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) -{ - preempt_disable(); - pagefault_disable(); - return __kmap_local_page_prot(page, prot); -} +static inline void *kmap_atomic(struct page *page); -static inline void *kmap_atomic(struct page *page) -{ - return kmap_atomic_prot(page, kmap_prot); -} - -static inline void *kmap_atomic_pfn(unsigned long pfn) -{ - preempt_disable(); - pagefault_disable(); - return __kmap_local_pfn_prot(pfn, kmap_prot); -} - -static inline void __kunmap_atomic(void *addr) -{ - kunmap_local_indexed(addr); -} - -/* declarations for linux/mm/highmem.c */ -unsigned int nr_free_highpages(void); -extern atomic_long_t _totalhigh_pages; -static inline unsigned long totalhigh_pages(void) -{ - return (unsigned long)atomic_long_read(&_totalhigh_pages); -} - -static inline void totalhigh_pages_inc(void) -{ - atomic_long_inc(&_totalhigh_pages); -} - -static inline void totalhigh_pages_add(long count) -{ - atomic_long_add(count, &_totalhigh_pages); -} - -void kmap_flush_unused(void); - -struct page *kmap_to_page(void *addr); - -#else /* CONFIG_HIGHMEM */ - -static inline unsigned int nr_free_highpages(void) { return 0; } - -static inline struct page *kmap_to_page(void *addr) -{ - return virt_to_page(addr); -} - -static inline unsigned long totalhigh_pages(void) { return 0UL; } +/** + * kunmap_atomic - Unmap the virtual address mapped by kmap_atomic() + * @addr: Virtual address to be unmapped + * + * Counterpart to kmap_atomic(). + * + * Undoes the side effects of kmap_atomic(), i.e. reenabling pagefaults and + * preemption. + * + * Other than that a NOOP for CONFIG_HIGHMEM=n and for mappings of pages + * in the low memory area. For real highmen pages the mapping which was + * established with kmap_atomic() is destroyed. + */ -static inline void *kmap(struct page *page) -{ - might_sleep(); - return page_address(page); -} +/* Highmem related interfaces for management code */ +static inline unsigned int nr_free_highpages(void); +static inline unsigned long totalhigh_pages(void); -static inline void kunmap_high(struct page *page) +#ifndef ARCH_HAS_FLUSH_ANON_PAGE +static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { } - -static inline void kunmap(struct page *page) -{ -#ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(page_address(page)); #endif -} -static inline void *kmap_atomic(struct page *page) +#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE +static inline void flush_kernel_dcache_page(struct page *page) { - preempt_disable(); - pagefault_disable(); - return page_address(page); } - -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +static inline void flush_kernel_vmap_range(void *vaddr, int size) { - return kmap_atomic(page); } - -static inline void *kmap_atomic_pfn(unsigned long pfn) +static inline void invalidate_kernel_vmap_range(void *vaddr, int size) { - return kmap_atomic(pfn_to_page(pfn)); } - -static inline void __kunmap_atomic(void *addr) -{ - /* - * Mostly nothing to do in the CONFIG_HIGHMEM=n case as kunmap_atomic() - * handles re-enabling faults and preemption - */ -#ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(addr); #endif -} - -#define kmap_flush_unused() do {} while(0) - - -#endif /* CONFIG_HIGHMEM */ - -/* - * Prevent people trying to call kunmap_atomic() as if it were kunmap() - * kunmap_atomic() should get the return value of kmap_atomic, not the page. - */ -#define kunmap_atomic(__addr) \ -do { \ - BUILD_BUG_ON(__same_type((__addr), struct page *)); \ - __kunmap_atomic(__addr); \ - pagefault_enable(); \ - preempt_enable(); \ -} while (0) /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage -- cgit v1.2.3 From 3fcd6a230fa7d03bffcb831a81b40435c146c12b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 3 Sep 2020 15:23:29 -0700 Subject: x86/cpu: Avoid cpuinfo-induced IPIing of idle CPUs Currently, accessing /proc/cpuinfo sends IPIs to idle CPUs in order to learn their clock frequency. Which is a bit strange, given that waking them from idle likely significantly changes their clock frequency. This commit therefore avoids sending /proc/cpuinfo-induced IPIs to idle CPUs. [ paulmck: Also check for idle in arch_freq_prepare_all(). ] Signed-off-by: Paul E. McKenney Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: --- include/linux/rcutiny.h | 2 ++ include/linux/rcutree.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 7c1ecdb356d8..2a97334eb786 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -89,6 +89,8 @@ static inline void rcu_irq_enter_irqson(void) { } static inline void rcu_irq_exit(void) { } static inline void rcu_irq_exit_preempt(void) { } static inline void rcu_irq_exit_check_preempt(void) { } +#define rcu_is_idle_cpu(cpu) \ + (is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq()) static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 59eb5cd567d7..df578b73960f 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -50,6 +50,7 @@ void rcu_irq_exit(void); void rcu_irq_exit_preempt(void); void rcu_irq_enter_irqson(void); void rcu_irq_exit_irqson(void); +bool rcu_is_idle_cpu(int cpu); #ifdef CONFIG_PROVE_RCU void rcu_irq_exit_check_preempt(void); -- cgit v1.2.3 From ede7dc7fa0af619afc08995776eadb9ff3b0a711 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:54 -0800 Subject: jbd2: rename j_maxlen to j_total_len and add jbd2_journal_max_txn_bufs The on-disk superblock field sb->s_maxlen represents the total size of the journal including the fast commit area and is no more the max number of blocks available for a transaction. The maximum number of blocks available to a transaction is reduced by the number of fast commit blocks. So, this patch renames j_maxlen to j_total_len to better represent its intent. Also, it adds a function to calculate max number of bufs available for a transaction. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-6-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1d5566af48ac..e0b6b53eae64 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -988,9 +988,9 @@ struct journal_s struct block_device *j_fs_dev; /** - * @j_maxlen: Total maximum capacity of the journal region on disk. + * @j_total_len: Total maximum capacity of the journal region on disk. */ - unsigned int j_maxlen; + unsigned int j_total_len; /** * @j_reserved_credits: @@ -1624,6 +1624,11 @@ int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); int jbd2_fc_release_bufs(journal_t *journal); +static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal) +{ + return (journal->j_total_len - journal->j_fc_wbufsize) / 4; +} + /* * is_journal_abort * -- cgit v1.2.3 From a1e5e465b31d6015fccb359d99053b39e5180466 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:55 -0800 Subject: ext4: clean up the JBD2 API that initializes fast commits This patch removes jbd2_fc_init() API and its related functions to simplify enabling fast commits. With this change, the number of fast commit blocks to use is solely determined by the JBD2 layer. So, we move the default value for minimum number of fast commit blocks from ext4/fast_commit.h to include/linux/jbd2.h. However, whether or not to use fast commits is determined by the file system. The file system just sets the fast commit feature using jbd2_journal_set_features(). JBD2 layer then determines how many blocks to use for fast commits (based on the value found in the JBD2 superblock). Note that the JBD2 feature flag of fast commits is just an indication that there are fast commit blocks present on disk. It doesn't tell JBD2 layer about the intent of the file system of whether to it wants to use fast commit or not. That's why, we blindly clear the fast commit flag in journal_reset() after the recovery is done. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-7-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e0b6b53eae64..b2caf7bbd8e5 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -68,6 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags); extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 +#define JBD2_MIN_FC_BLOCKS 256 #ifdef __KERNEL__ @@ -1614,7 +1615,6 @@ extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ -int jbd2_fc_init(journal_t *journal, int num_fc_blks); int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); -- cgit v1.2.3 From c460e5edc85a063ec9cb60addff93d00ed378701 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:57 -0800 Subject: jbd2: don't use state lock during commit path Variables journal->j_fc_off, journal->j_fc_wbuf are accessed during commit path. Since today we allow only one process to perform a fast commit, there is no need take state lock before accessing these variables. This patch removes these locks and adds comments to describe this. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-9-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b2caf7bbd8e5..5f0ef6380b0c 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -945,8 +945,9 @@ struct journal_s /** * @j_fc_off: * - * Number of fast commit blocks currently allocated. - * [j_state_lock]. + * Number of fast commit blocks currently allocated. Accessed only + * during fast commit. Currently only process can do fast commit, so + * this field is not protected by any lock. */ unsigned long j_fc_off; @@ -1109,8 +1110,9 @@ struct journal_s struct buffer_head **j_wbuf; /** - * @j_fc_wbuf: Array of fast commit bhs for - * jbd2_journal_commit_transaction. + * @j_fc_wbuf: Array of fast commit bhs for fast commit. Accessed only + * during a fast commit. Currently only process can do fast commit, so + * this field is not protected by any lock. */ struct buffer_head **j_fc_wbuf; -- cgit v1.2.3 From 0bce577bf9cae13ae32d391432d0030e3f67fc1d Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:58 -0800 Subject: jbd2: don't pass tid to jbd2_fc_end_commit_fallback() In jbd2_fc_end_commit_fallback(), we know which tid to commit. There's no need for caller to pass it. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-10-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 5f0ef6380b0c..1c49fd62ff2e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1619,7 +1619,7 @@ extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); -int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); +int jbd2_fc_end_commit_fallback(journal_t *journal); int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); int jbd2_submit_inode_data(struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); -- cgit v1.2.3 From eda2845ae5e0ae466c1aca715d642b4977311747 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Oct 2020 18:59:15 +0200 Subject: irqdomain: Remove unused of_device_id forward declaration There is no users of of_device_id in irqdomain.h. Drop it. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20201030165919.86234-2-andriy.shevchenko@linux.intel.com --- include/linux/irqdomain.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 71535e87109f..56642188ec21 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -38,7 +38,6 @@ struct device_node; struct irq_domain; -struct of_device_id; struct irq_chip; struct irq_data; struct cpumask; -- cgit v1.2.3 From 08219fb1efae451c83281cb6ba4bb6c35ac88fab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Oct 2020 18:59:16 +0200 Subject: irqdomain: Add forward declaration of fwnode_handle irqdomain.h is a user of struct fwnode_handle. Add forward declaration of it. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20201030165919.86234-3-andriy.shevchenko@linux.intel.com --- include/linux/irqdomain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 56642188ec21..d21f75d294d7 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -37,6 +37,7 @@ #include struct device_node; +struct fwnode_handle; struct irq_domain; struct irq_chip; struct irq_data; -- cgit v1.2.3 From b6e95788fde8c9bc9da729102085dd36a5a0cda6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Oct 2020 18:59:18 +0200 Subject: irqdomain: Introduce irq_domain_create_legacy() API Introduce irq_domain_create_legacy() API which is functional equivalent to the existing irq_domain_add_legacy(), but takes a pointer to the struct fwnode_handle as a parameter. This is useful for non OF systems. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20201030165919.86234-5-andriy.shevchenko@linux.intel.com --- include/linux/irqdomain.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index d21f75d294d7..77bf7d84c673 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -271,6 +271,12 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data); +struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, + unsigned int size, + unsigned int first_irq, + irq_hw_number_t first_hwirq, + const struct irq_domain_ops *ops, + void *host_data); extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); extern bool irq_domain_check_msi_remap(void); -- cgit v1.2.3 From 1d4ef9b39ebecca827642b8897d2d79ea2026682 Mon Sep 17 00:00:00 2001 From: Cristian Pop Date: Mon, 28 Sep 2020 12:09:55 +0300 Subject: iio: core: Add optional symbolic label to a device channel If a label is defined in the device tree for this channel add that to the channel specific attributes. This is useful for userspace to be able to identify an individual channel. Signed-off-by: Cristian Pop Link: https://lore.kernel.org/r/20200928090959.88842-1-cristian.pop@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 2e45b3ceafa7..9a3cf4815148 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -362,6 +362,8 @@ struct iio_trigger; /* forward declaration */ * and max. For lists, all possible values are enumerated. * @write_raw: function to write a value to the device. * Parameters are the same as for read_raw. + * @read_label: function to request label name for a specified label, + * for better channel identification. * @write_raw_get_fmt: callback function to query the expected * format/precision. If not set by the driver, write_raw * returns IIO_VAL_INT_PLUS_MICRO. @@ -420,6 +422,10 @@ struct iio_info { int val2, long mask); + int (*read_label)(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + char *label); + int (*write_raw_get_fmt)(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, long mask); -- cgit v1.2.3 From 0e30f47232ab57c685258aa91adc3a3e67bd023e Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Mon, 5 Oct 2020 21:01:26 +0530 Subject: mtd: spi-nor: add support for DTR protocol Double Transfer Rate (DTR) is SPI protocol in which data is transferred on each clock edge as opposed to on each clock cycle. Make framework-level changes to allow supporting flashes in DTR mode. Right now, mixed DTR modes are not supported. So, for example a mode like 4S-4D-4D will not work. All phases need to be either DTR or STR. The xSPI spec says that "The program commands provide SPI backward compatible commands for programming data...". So 8D-8D-8D page program opcodes are populated with using 1S-1S-1S opcodes. Signed-off-by: Pratyush Yadav Signed-off-by: Vignesh Raghavendra Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20201005153138.6437-4-p.yadav@ti.com --- include/linux/mtd/spi-nor.h | 51 +++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 60bac2c0ec45..cd549042c53d 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -182,6 +182,7 @@ enum spi_nor_protocol { SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2), SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4), SNOR_PROTO_1_8_8_DTR = SNOR_PROTO_DTR(1, 8, 8), + SNOR_PROTO_8_8_8_DTR = SNOR_PROTO_DTR(8, 8, 8), }; static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto) @@ -228,7 +229,7 @@ struct spi_nor_hwcaps { * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly * (Slow) Read. */ -#define SNOR_HWCAPS_READ_MASK GENMASK(14, 0) +#define SNOR_HWCAPS_READ_MASK GENMASK(15, 0) #define SNOR_HWCAPS_READ BIT(0) #define SNOR_HWCAPS_READ_FAST BIT(1) #define SNOR_HWCAPS_READ_1_1_1_DTR BIT(2) @@ -245,11 +246,12 @@ struct spi_nor_hwcaps { #define SNOR_HWCAPS_READ_4_4_4 BIT(9) #define SNOR_HWCAPS_READ_1_4_4_DTR BIT(10) -#define SNOR_HWCAPS_READ_OCTAL GENMASK(14, 11) +#define SNOR_HWCAPS_READ_OCTAL GENMASK(15, 11) #define SNOR_HWCAPS_READ_1_1_8 BIT(11) #define SNOR_HWCAPS_READ_1_8_8 BIT(12) #define SNOR_HWCAPS_READ_8_8_8 BIT(13) #define SNOR_HWCAPS_READ_1_8_8_DTR BIT(14) +#define SNOR_HWCAPS_READ_8_8_8_DTR BIT(15) /* * Page Program capabilities. @@ -260,18 +262,19 @@ struct spi_nor_hwcaps { * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory * implements such commands. */ -#define SNOR_HWCAPS_PP_MASK GENMASK(22, 16) -#define SNOR_HWCAPS_PP BIT(16) +#define SNOR_HWCAPS_PP_MASK GENMASK(23, 16) +#define SNOR_HWCAPS_PP BIT(16) -#define SNOR_HWCAPS_PP_QUAD GENMASK(19, 17) -#define SNOR_HWCAPS_PP_1_1_4 BIT(17) -#define SNOR_HWCAPS_PP_1_4_4 BIT(18) -#define SNOR_HWCAPS_PP_4_4_4 BIT(19) +#define SNOR_HWCAPS_PP_QUAD GENMASK(19, 17) +#define SNOR_HWCAPS_PP_1_1_4 BIT(17) +#define SNOR_HWCAPS_PP_1_4_4 BIT(18) +#define SNOR_HWCAPS_PP_4_4_4 BIT(19) -#define SNOR_HWCAPS_PP_OCTAL GENMASK(22, 20) -#define SNOR_HWCAPS_PP_1_1_8 BIT(20) -#define SNOR_HWCAPS_PP_1_8_8 BIT(21) -#define SNOR_HWCAPS_PP_8_8_8 BIT(22) +#define SNOR_HWCAPS_PP_OCTAL GENMASK(23, 20) +#define SNOR_HWCAPS_PP_1_1_8 BIT(20) +#define SNOR_HWCAPS_PP_1_8_8 BIT(21) +#define SNOR_HWCAPS_PP_8_8_8 BIT(22) +#define SNOR_HWCAPS_PP_8_8_8_DTR BIT(23) #define SNOR_HWCAPS_X_X_X (SNOR_HWCAPS_READ_2_2_2 | \ SNOR_HWCAPS_READ_4_4_4 | \ @@ -279,10 +282,14 @@ struct spi_nor_hwcaps { SNOR_HWCAPS_PP_4_4_4 | \ SNOR_HWCAPS_PP_8_8_8) +#define SNOR_HWCAPS_X_X_X_DTR (SNOR_HWCAPS_READ_8_8_8_DTR | \ + SNOR_HWCAPS_PP_8_8_8_DTR) + #define SNOR_HWCAPS_DTR (SNOR_HWCAPS_READ_1_1_1_DTR | \ SNOR_HWCAPS_READ_1_2_2_DTR | \ SNOR_HWCAPS_READ_1_4_4_DTR | \ - SNOR_HWCAPS_READ_1_8_8_DTR) + SNOR_HWCAPS_READ_1_8_8_DTR | \ + SNOR_HWCAPS_READ_8_8_8_DTR) #define SNOR_HWCAPS_ALL (SNOR_HWCAPS_READ_MASK | \ SNOR_HWCAPS_PP_MASK) @@ -318,6 +325,22 @@ struct spi_nor_controller_ops { int (*erase)(struct spi_nor *nor, loff_t offs); }; +/** + * enum spi_nor_cmd_ext - describes the command opcode extension in DTR mode + * @SPI_NOR_EXT_NONE: no extension. This is the default, and is used in Legacy + * SPI mode + * @SPI_NOR_EXT_REPEAT: the extension is same as the opcode + * @SPI_NOR_EXT_INVERT: the extension is the bitwise inverse of the opcode + * @SPI_NOR_EXT_HEX: the extension is any hex value. The command and opcode + * combine to form a 16-bit opcode. + */ +enum spi_nor_cmd_ext { + SPI_NOR_EXT_NONE = 0, + SPI_NOR_EXT_REPEAT, + SPI_NOR_EXT_INVERT, + SPI_NOR_EXT_HEX, +}; + /* * Forward declarations that are used internally by the core and manufacturer * drivers. @@ -345,6 +368,7 @@ struct spi_nor_flash_parameter; * @program_opcode: the program opcode * @sst_write_second: used by the SST write operation * @flags: flag options for the current SPI NOR (SNOR_F_*) + * @cmd_ext_type: the command opcode extension type for DTR mode. * @read_proto: the SPI protocol for read operations * @write_proto: the SPI protocol for write operations * @reg_proto: the SPI protocol for read_reg/write_reg/erase operations @@ -376,6 +400,7 @@ struct spi_nor { enum spi_nor_protocol reg_proto; bool sst_write_second; u32 flags; + enum spi_nor_cmd_ext cmd_ext_type; const struct spi_nor_controller_ops *controller_ops; -- cgit v1.2.3 From d73ee7534cc537b98b61bfd83dbce5bb12aecb80 Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Mon, 5 Oct 2020 21:01:35 +0530 Subject: mtd: spi-nor: core: perform a Soft Reset on shutdown Perform a Soft Reset on shutdown on flashes that support it so that the flash can be reset to its initial state and any configurations made by spi-nor (given that they're only done in volatile registers) will be reset. This will hand back the flash in pristine state for any further operations on it. Signed-off-by: Pratyush Yadav Signed-off-by: Vignesh Raghavendra Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20201005153138.6437-13-p.yadav@ti.com --- include/linux/mtd/spi-nor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index cd549042c53d..299685d15dc2 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -51,6 +51,8 @@ #define SPINOR_OP_CLFSR 0x50 /* Clear flag status register */ #define SPINOR_OP_RDEAR 0xc8 /* Read Extended Address Register */ #define SPINOR_OP_WREAR 0xc5 /* Write Extended Address Register */ +#define SPINOR_OP_SRSTEN 0x66 /* Software Reset Enable */ +#define SPINOR_OP_SRST 0x99 /* Software Reset */ /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ #define SPINOR_OP_READ_4B 0x13 /* Read data bytes (low frequency) */ -- cgit v1.2.3 From 43676605f890b218e551f396a55dbaea7799acb4 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 3 Nov 2020 10:30:10 +0100 Subject: drm/ttm: Add vmap/vunmap to TTM and TTM GEM helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new functions ttm_bo_{vmap,vunmap}() map and unmap a TTM BO in kernel address space. The mapping's address is returned as struct dma_buf_map. Each function is a simplified version of TTM's existing kmap code. Both functions respect the memory's location ani/or writecombine flags. On top TTM's functions, GEM TTM helpers got drm_gem_ttm_{vmap,vunmap}(), two helpers that convert a GEM object into the TTM BO and forward the call to TTM's vmap/vunmap. These helpers can be dropped into the rsp GEM object callbacks. v5: * use size_t for storing mapping size (Christian) * ignore premapped memory areas correctly in ttm_bo_vunmap() * rebase onto latest TTM interfaces (Christian) * remove BUG() from ttm_bo_vmap() (Christian) v4: * drop ttm_kmap_obj_to_dma_buf() in favor of vmap helpers (Daniel, Christian) Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Acked-by: Daniel Vetter Tested-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20201103093015.1063-6-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index fd1aba545fdf..2e8bbecb5091 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -45,6 +45,12 @@ * * dma_buf_map_set_vaddr(&map. 0xdeadbeaf); * + * To set an address in I/O memory, use dma_buf_map_set_vaddr_iomem(). + * + * .. code-block:: c + * + * dma_buf_map_set_vaddr_iomem(&map. 0xdeadbeaf); + * * Test if a mapping is valid with either dma_buf_map_is_set() or * dma_buf_map_is_null(). * @@ -118,6 +124,20 @@ static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr) map->is_iomem = false; } +/** + * dma_buf_map_set_vaddr_iomem - Sets a dma-buf mapping structure to an address in I/O memory + * @map: The dma-buf mapping structure + * @vaddr_iomem: An I/O-memory address + * + * Sets the address and the I/O-memory flag. + */ +static inline void dma_buf_map_set_vaddr_iomem(struct dma_buf_map *map, + void __iomem *vaddr_iomem) +{ + map->vaddr_iomem = vaddr_iomem; + map->is_iomem = true; +} + /** * dma_buf_map_is_equal - Compares two dma-buf mapping structures for equality * @lhs: The dma-buf mapping structure -- cgit v1.2.3 From b4e7090c242ec64bfa77a69ea7c3cceba21c7a65 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 3 Nov 2020 10:30:14 +0100 Subject: dma-buf-map: Add memcpy and pointer-increment interfaces To do framebuffer updates, one needs memcpy from system memory and a pointer-increment function. Add both interfaces with documentation. v5: * include to build on sparc64 (Sam) Signed-off-by: Thomas Zimmermann Reviewed-by: Sam Ravnborg Tested-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20201103093015.1063-10-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 73 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index 2e8bbecb5091..583a3a1f9447 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -7,6 +7,7 @@ #define __DMA_BUF_MAP_H__ #include +#include /** * DOC: overview @@ -32,6 +33,14 @@ * accessing the buffer. Use the returned instance and the helper functions * to access the buffer's memory in the correct way. * + * The type :c:type:`struct dma_buf_map ` and its helpers are + * actually independent from the dma-buf infrastructure. When sharing buffers + * among devices, drivers have to know the location of the memory to access + * the buffers in a safe way. :c:type:`struct dma_buf_map ` + * solves this problem for dma-buf and its users. If other drivers or + * sub-systems require similar functionality, the type could be generalized + * and moved to a more prominent header file. + * * Open-coding access to :c:type:`struct dma_buf_map ` is * considered bad style. Rather then accessing its fields directly, use one * of the provided helper functions, or implement your own. For example, @@ -51,6 +60,14 @@ * * dma_buf_map_set_vaddr_iomem(&map. 0xdeadbeaf); * + * Instances of struct dma_buf_map do not have to be cleaned up, but + * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings + * always refer to system memory. + * + * .. code-block:: c + * + * dma_buf_map_clear(&map); + * * Test if a mapping is valid with either dma_buf_map_is_set() or * dma_buf_map_is_null(). * @@ -73,17 +90,19 @@ * if (dma_buf_map_is_equal(&sys_map, &io_map)) * // always false * - * Instances of struct dma_buf_map do not have to be cleaned up, but - * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings - * always refer to system memory. + * A set up instance of struct dma_buf_map can be used to access or manipulate + * the buffer memory. Depending on the location of the memory, the provided + * helpers will pick the correct operations. Data can be copied into the memory + * with dma_buf_map_memcpy_to(). The address can be manipulated with + * dma_buf_map_incr(). * - * The type :c:type:`struct dma_buf_map ` and its helpers are - * actually independent from the dma-buf infrastructure. When sharing buffers - * among devices, drivers have to know the location of the memory to access - * the buffers in a safe way. :c:type:`struct dma_buf_map ` - * solves this problem for dma-buf and its users. If other drivers or - * sub-systems require similar functionality, the type could be generalized - * and moved to a more prominent header file. + * .. code-block:: c + * + * const void *src = ...; // source buffer + * size_t len = ...; // length of src + * + * dma_buf_map_memcpy_to(&map, src, len); + * dma_buf_map_incr(&map, len); // go to first byte after the memcpy */ /** @@ -210,4 +229,38 @@ static inline void dma_buf_map_clear(struct dma_buf_map *map) } } +/** + * dma_buf_map_memcpy_to - Memcpy into dma-buf mapping + * @dst: The dma-buf mapping structure + * @src: The source buffer + * @len: The number of byte in src + * + * Copies data into a dma-buf mapping. The source buffer is in system + * memory. Depending on the buffer's location, the helper picks the correct + * method of accessing the memory. + */ +static inline void dma_buf_map_memcpy_to(struct dma_buf_map *dst, const void *src, size_t len) +{ + if (dst->is_iomem) + memcpy_toio(dst->vaddr_iomem, src, len); + else + memcpy(dst->vaddr, src, len); +} + +/** + * dma_buf_map_incr - Increments the address stored in a dma-buf mapping + * @map: The dma-buf mapping structure + * @incr: The number of bytes to increment + * + * Increments the address stored in a dma-buf mapping. Depending on the + * buffer's location, the correct value will be updated. + */ +static inline void dma_buf_map_incr(struct dma_buf_map *map, size_t incr) +{ + if (map->is_iomem) + map->vaddr_iomem += incr; + else + map->vaddr += incr; +} + #endif /* __DMA_BUF_MAP_H__ */ -- cgit v1.2.3 From 22447a99c97e353bde8f90c2353873f27681d57c Mon Sep 17 00:00:00 2001 From: Pawel Czarnecki Date: Tue, 13 Oct 2020 16:45:52 +0200 Subject: drivers/soc/litex: add LiteX SoC Controller driver This commit adds driver for the FPGA-based LiteX SoC Controller from LiteX SoC builder. Co-developed-by: Mateusz Holenko Signed-off-by: Mateusz Holenko Signed-off-by: Pawel Czarnecki Signed-off-by: Stafford Horne --- include/linux/litex.h | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 include/linux/litex.h (limited to 'include/linux') diff --git a/include/linux/litex.h b/include/linux/litex.h new file mode 100644 index 000000000000..40f5be503593 --- /dev/null +++ b/include/linux/litex.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common LiteX header providing + * helper functions for accessing CSRs. + * + * Implementation of the functions is provided by + * the LiteX SoC Controller driver. + * + * Copyright (C) 2019-2020 Antmicro + */ + +#ifndef _LINUX_LITEX_H +#define _LINUX_LITEX_H + +#include +#include +#include + +/* + * The parameters below are true for LiteX SoCs configured for 8-bit CSR Bus, + * 32-bit aligned. + * + * Supporting other configurations will require extending the logic in this + * header and in the LiteX SoC controller driver. + */ +#define LITEX_REG_SIZE 0x4 +#define LITEX_SUBREG_SIZE 0x1 +#define LITEX_SUBREG_SIZE_BIT (LITEX_SUBREG_SIZE * 8) + +#define WRITE_LITEX_SUBREGISTER(val, base_offset, subreg_id) \ + writel((u32 __force)cpu_to_le32(val), base_offset + (LITEX_REG_SIZE * subreg_id)) + +#define READ_LITEX_SUBREGISTER(base_offset, subreg_id) \ + le32_to_cpu((__le32 __force)readl(base_offset + (LITEX_REG_SIZE * subreg_id))) + +void litex_set_reg(void __iomem *reg, unsigned long reg_sz, unsigned long val); + +unsigned long litex_get_reg(void __iomem *reg, unsigned long reg_sz); + +static inline void litex_write8(void __iomem *reg, u8 val) +{ + WRITE_LITEX_SUBREGISTER(val, reg, 0); +} + +static inline void litex_write16(void __iomem *reg, u16 val) +{ + WRITE_LITEX_SUBREGISTER(val >> 8, reg, 0); + WRITE_LITEX_SUBREGISTER(val, reg, 1); +} + +static inline void litex_write32(void __iomem *reg, u32 val) +{ + WRITE_LITEX_SUBREGISTER(val >> 24, reg, 0); + WRITE_LITEX_SUBREGISTER(val >> 16, reg, 1); + WRITE_LITEX_SUBREGISTER(val >> 8, reg, 2); + WRITE_LITEX_SUBREGISTER(val, reg, 3); +} + +static inline void litex_write64(void __iomem *reg, u64 val) +{ + WRITE_LITEX_SUBREGISTER(val >> 56, reg, 0); + WRITE_LITEX_SUBREGISTER(val >> 48, reg, 1); + WRITE_LITEX_SUBREGISTER(val >> 40, reg, 2); + WRITE_LITEX_SUBREGISTER(val >> 32, reg, 3); + WRITE_LITEX_SUBREGISTER(val >> 24, reg, 4); + WRITE_LITEX_SUBREGISTER(val >> 16, reg, 5); + WRITE_LITEX_SUBREGISTER(val >> 8, reg, 6); + WRITE_LITEX_SUBREGISTER(val, reg, 7); +} + +static inline u8 litex_read8(void __iomem *reg) +{ + return READ_LITEX_SUBREGISTER(reg, 0); +} + +static inline u16 litex_read16(void __iomem *reg) +{ + return (READ_LITEX_SUBREGISTER(reg, 0) << 8) + | (READ_LITEX_SUBREGISTER(reg, 1)); +} + +static inline u32 litex_read32(void __iomem *reg) +{ + return (READ_LITEX_SUBREGISTER(reg, 0) << 24) + | (READ_LITEX_SUBREGISTER(reg, 1) << 16) + | (READ_LITEX_SUBREGISTER(reg, 2) << 8) + | (READ_LITEX_SUBREGISTER(reg, 3)); +} + +static inline u64 litex_read64(void __iomem *reg) +{ + return ((u64)READ_LITEX_SUBREGISTER(reg, 0) << 56) + | ((u64)READ_LITEX_SUBREGISTER(reg, 1) << 48) + | ((u64)READ_LITEX_SUBREGISTER(reg, 2) << 40) + | ((u64)READ_LITEX_SUBREGISTER(reg, 3) << 32) + | ((u64)READ_LITEX_SUBREGISTER(reg, 4) << 24) + | ((u64)READ_LITEX_SUBREGISTER(reg, 5) << 16) + | ((u64)READ_LITEX_SUBREGISTER(reg, 6) << 8) + | ((u64)READ_LITEX_SUBREGISTER(reg, 7)); +} + +#endif /* _LINUX_LITEX_H */ -- cgit v1.2.3 From 267fb27352b6fc9fdbad753127a239f75618ecbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 15:50:32 +0100 Subject: perf: Reduce stack usage of perf_output_begin() __perf_output_begin() has an on-stack struct perf_sample_data in the unlikely case it needs to generate a LOST record. However, every call to perf_output_begin() must already have a perf_sample_data on-stack. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201030151954.985416146@infradead.org --- include/linux/perf_event.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0c19d279b97f..b775ae0a8c87 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1400,11 +1400,14 @@ perf_event_addr_filters(struct perf_event *event) extern void perf_event_addr_filters_sync(struct perf_event *event); extern int perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); extern int perf_output_begin_forward(struct perf_output_handle *handle, - struct perf_event *event, - unsigned int size); + struct perf_sample_data *data, + struct perf_event *event, + unsigned int size); extern int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); -- cgit v1.2.3 From 76a4efa80900fc40e0fdf243b42aec9fb8c35d24 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 12:14:21 +0100 Subject: perf/arch: Remove perf_sample_data::regs_user_copy struct perf_sample_data lives on-stack, we should be careful about it's size. Furthermore, the pt_regs copy in there is only because x86_64 is a trainwreck, solve it differently. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Steven Rostedt Link: https://lkml.kernel.org/r/20201030151955.258178461@infradead.org --- include/linux/perf_event.h | 6 ------ include/linux/perf_regs.h | 6 ++---- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b775ae0a8c87..96450f6fb1de 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1022,13 +1022,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; u64 aux_size; - /* - * regs_user may point to task_pt_regs or to regs_user_copy, depending - * on arch details. - */ struct perf_regs regs_user; - struct pt_regs regs_user_copy; - struct perf_regs regs_intr; u64 stack_user_size; diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index 2d12e97d5e7b..f632c5725f16 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -20,8 +20,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); u64 perf_reg_abi(struct task_struct *task); void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy); + struct pt_regs *regs); #else #define PERF_REG_EXTENDED_MASK 0 @@ -42,8 +41,7 @@ static inline u64 perf_reg_abi(struct task_struct *task) } static inline void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); -- cgit v1.2.3 From 21774fd81a51ec1eccd59caf922d387977acd2aa Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 15 Oct 2020 14:57:26 -0400 Subject: kernfs: bring names in comments in line with code Fix two stragglers in the comments of the below rename operation. Fixes: adc5e8b58f48 ("kernfs: drop s_ prefix from kernfs_node members") Acked-by: Tejun Heo Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20201015185726.1386868-1-willemdebruijn.kernel@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 89f6a4214a70..9e8ca8743c26 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -116,7 +116,7 @@ struct kernfs_elem_attr { * kernfs node is represented by single kernfs_node. Most fields are * private to kernfs and shouldn't be accessed directly by kernfs users. * - * As long as s_count reference is held, the kernfs_node itself is + * As long as count reference is held, the kernfs_node itself is * accessible. Dereferencing elem or any other outer entity requires * active reference. */ -- cgit v1.2.3 From 9f38abefd37af8726d59706b9b84530630b6b620 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:17 +0200 Subject: uio: fix some kernel-doc markups The definitions for (devm_)uio_register_device should be at the header file, as the macros are there. The ones inside uio.c refer, instead, to __(devm_)uio_register_device. Update them and add new kernel-doc markups for the macros. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/82ab7b68d271aeda7396e369ff8a629491b9d628.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 54bf6b118401..47c5962b876b 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -117,6 +117,14 @@ extern int __must_check struct uio_info *info); /* use a define to avoid include chaining to get THIS_MODULE */ + +/** + * uio_register_device - register a new userspace IO device + * @parent: parent device + * @info: UIO device capabilities + * + * returns zero on success or a negative error code. + */ #define uio_register_device(parent, info) \ __uio_register_device(THIS_MODULE, parent, info) @@ -129,6 +137,14 @@ extern int __must_check struct uio_info *info); /* use a define to avoid include chaining to get THIS_MODULE */ + +/** + * devm_uio_register_device - Resource managed uio_register_device() + * @parent: parent device + * @info: UIO device capabilities + * + * returns zero on success or a negative error code. + */ #define devm_uio_register_device(parent, info) \ __devm_uio_register_device(THIS_MODULE, parent, info) -- cgit v1.2.3 From 33c0c9bdf7a59051a654cd98b7d2b48ce0080967 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:32:56 +0200 Subject: drivers: base: fix some kernel-doc markups class_create is actually defined at the header. Fix the markup there and add a new one at the right place. While here, also fix some markups for functions that have different names between their prototypes and kernel-doc comments. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/2fb6efd6a1f90d69ff73bf579566079cbb051e15.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/class.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device/class.h b/include/linux/device/class.h index e8d470c457d1..e61ec5502019 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -256,6 +256,20 @@ extern void class_destroy(struct class *cls); /* This is a #define to keep the compiler from merging different * instances of the __key variable */ + +/** + * class_create - create a struct class structure + * @owner: pointer to the module that is to "own" this struct class + * @name: pointer to a string for the name of this class. + * + * This is used to create a struct class pointer that can then be used + * in calls to device_create(). + * + * Returns &struct class pointer on success, or ERR_PTR() on error. + * + * Note, the pointer created here is to be destroyed when finished by + * making a call to class_destroy(). + */ #define class_create(owner, name) \ ({ \ static struct lock_class_key __key; \ -- cgit v1.2.3 From a18394269fc87276963e8d965c730900178d7e4b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 7 Nov 2020 21:49:07 +0100 Subject: net: core: add dev_get_tstats64 as a ndo_get_stats64 implementation It's a frequent pattern to use netdev->stats for the less frequently accessed counters and per-cpu counters for the frequently accessed counters (rx/tx bytes/packets). Add a default ndo_get_stats64() implementation for this use case. Reviewed-by: Florian Fainelli Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a53ed2d1ed1d..7ce648a564f7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4527,6 +4527,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, const struct net_device_stats *netdev_stats); void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, const struct pcpu_sw_netstats __percpu *netstats); +void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; -- cgit v1.2.3 From 9a2a9ebc0a758d887ee06e067e9f7f0b36ff7574 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:25:57 +0100 Subject: cpufreq: Introduce governor flags A new cpufreq governor flag will be added subsequently, so replace the bool dynamic_switching fleid in struct cpufreq_governor with a flags field and introduce CPUFREQ_GOV_DYNAMIC_SWITCHING to set for the "dynamic switching" governors instead of it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1eaa04f1bae6..9bdfcf3c4748 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -570,12 +570,17 @@ struct cpufreq_governor { char *buf); int (*store_setspeed) (struct cpufreq_policy *policy, unsigned int freq); - /* For governors which change frequency dynamically by themselves */ - bool dynamic_switching; struct list_head governor_list; struct module *owner; + u8 flags; }; +/* Governor flags */ + +/* For governors which change frequency dynamically by themselves */ +#define CPUFREQ_GOV_DYNAMIC_SWITCHING BIT(0) + + /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq); -- cgit v1.2.3 From 218f66870181bec7aaa6e3c72f346039c590c3c2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:26:10 +0100 Subject: cpufreq: Introduce CPUFREQ_GOV_STRICT_TARGET Introduce a new governor flag, CPUFREQ_GOV_STRICT_TARGET, for the governors that want the target frequency to be set exactly to the given value without leaving any room for adjustments on the hardware side and set this flag for the powersave and performance governors. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9bdfcf3c4748..6eb9a3b8ec7b 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -580,6 +580,9 @@ struct cpufreq_governor { /* For governors which change frequency dynamically by themselves */ #define CPUFREQ_GOV_DYNAMIC_SWITCHING BIT(0) +/* For governors wanting the target frequency to be set exactly */ +#define CPUFREQ_GOV_STRICT_TARGET BIT(1) + /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, -- cgit v1.2.3 From ea9364bbadf11f0c55802cf11387d74f524cee84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:26:37 +0100 Subject: cpufreq: Add strict_target to struct cpufreq_policy Add a new field to be set when the CPUFREQ_GOV_STRICT_TARGET flag is set for the current governor to struct cpufreq_policy, so that the drivers needing to check CPUFREQ_GOV_STRICT_TARGET do not have to access the governor object during every frequency transition. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 6eb9a3b8ec7b..acbad3b36322 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -109,6 +109,12 @@ struct cpufreq_policy { bool fast_switch_possible; bool fast_switch_enabled; + /* + * Set if the CPUFREQ_GOV_STRICT_TARGET flag is set for the current + * governor. + */ + bool strict_target; + /* * Preferred average time interval between consecutive invocations of * the driver to set the frequency for this policy. To be set by the -- cgit v1.2.3 From a8b62fd0850503cf1e557d7e5a98d3f1f5c25eef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 21 Sep 2020 12:58:17 +0200 Subject: stop_machine: Add function and caller debug info Crashes in stop-machine are hard to connect to the calling code, add a little something to help with that. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102346.116513635@infradead.org --- include/linux/stop_machine.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 76d8b09384a7..30577c3aecf8 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg); struct cpu_stop_work { struct list_head list; /* cpu_stopper->works */ cpu_stop_fn_t fn; + unsigned long caller; void *arg; struct cpu_stop_done *done; }; @@ -36,6 +37,8 @@ void stop_machine_park(int cpu); void stop_machine_unpark(int cpu); void stop_machine_yield(const struct cpumask *cpumask); +extern void print_stop_info(const char *log_lvl, struct task_struct *task); + #else /* CONFIG_SMP */ #include @@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu, return false; } +static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { } + #endif /* CONFIG_SMP */ /* -- cgit v1.2.3 From 1cf12e08bc4d50a76b80c42a3109c53d8794a0c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Sep 2020 09:27:18 +0200 Subject: sched/hotplug: Consolidate task migration on CPU unplug With the new mechanism which kicks tasks off the outgoing CPU at the end of schedule() the situation on an outgoing CPU right before the stopper thread brings it down completely is: - All user tasks and all unbound kernel threads have either been migrated away or are not running and the next wakeup will move them to a online CPU. - All per CPU kernel threads, except cpu hotplug thread and the stopper thread have either been unbound or parked by the responsible CPU hotplug callback. That means that at the last step before the stopper thread is invoked the cpu hotplug thread is the last legitimate running task on the outgoing CPU. Add a final wait step right before the stopper thread is kicked which ensures that any still running tasks on the way to park or on the way to kick themself of the CPU are either sleeping or gone. This allows to remove the migrate_tasks() crutch in sched_cpu_dying(). If sched_cpu_dying() detects that there is still another running task aside of the stopper thread then it will explode with the appropriate fireworks. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102346.547163969@infradead.org --- include/linux/cpuhotplug.h | 1 + include/linux/sched/hotplug.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index bc56287a1ed1..0042ef362511 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -152,6 +152,7 @@ enum cpuhp_state { CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h index 9a62ffdd296f..412cdaba33eb 100644 --- a/include/linux/sched/hotplug.h +++ b/include/linux/sched/hotplug.h @@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu); extern int sched_cpu_deactivate(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU +extern int sched_cpu_wait_empty(unsigned int cpu); extern int sched_cpu_dying(unsigned int cpu); #else +# define sched_cpu_wait_empty NULL # define sched_cpu_dying NULL #endif -- cgit v1.2.3 From af449901b84c98cbd84a0113223ba3bcfcb12a26 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Sep 2020 10:38:30 +0200 Subject: sched: Add migrate_disable() Add the base migrate_disable() support (under protest). While migrate_disable() is (currently) required for PREEMPT_RT, it is also one of the biggest flaws in the system. Notably this is just the base implementation, it is broken vs sched_setaffinity() and hotplug, both solved in additional patches for ease of review. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102346.818170844@infradead.org --- include/linux/preempt.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 3 +++ 2 files changed, 68 insertions(+) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7d9c1c0e149c..97ba7c920653 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -322,6 +322,69 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, #endif +#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) + +/* + * Migrate-Disable and why it is (strongly) undesired. + * + * The premise of the Real-Time schedulers we have on Linux + * (SCHED_FIFO/SCHED_DEADLINE) is that M CPUs can/will run M tasks + * concurrently, provided there are sufficient runnable tasks, also known as + * work-conserving. For instance SCHED_DEADLINE tries to schedule the M + * earliest deadline threads, and SCHED_FIFO the M highest priority threads. + * + * The correctness of various scheduling models depends on this, but is it + * broken by migrate_disable() that doesn't imply preempt_disable(). Where + * preempt_disable() implies an immediate priority ceiling, preemptible + * migrate_disable() allows nesting. + * + * The worst case is that all tasks preempt one another in a migrate_disable() + * region and stack on a single CPU. This then reduces the available bandwidth + * to a single CPU. And since Real-Time schedulability theory considers the + * Worst-Case only, all Real-Time analysis shall revert to single-CPU + * (instantly solving the SMP analysis problem). + * + * + * The reason we have it anyway. + * + * PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a + * number of primitives into becoming preemptible, they would also allow + * migration. This turns out to break a bunch of per-cpu usage. To this end, + * all these primitives employ migirate_disable() to restore this implicit + * assumption. + * + * This is a 'temporary' work-around at best. The correct solution is getting + * rid of the above assumptions and reworking the code to employ explicit + * per-cpu locking or short preempt-disable regions. + * + * The end goal must be to get rid of migrate_disable(), alternatively we need + * a schedulability theory that does not depend on abritrary migration. + * + * + * Notes on the implementation. + * + * The implementation is particularly tricky since existing code patterns + * dictate neither migrate_disable() nor migrate_enable() is allowed to block. + * This means that it cannot use cpus_read_lock() to serialize against hotplug, + * nor can it easily migrate itself into a pending affinity mask change on + * migrate_enable(). + * + * + * Note: even non-work-conserving schedulers like semi-partitioned depends on + * migration, so migrate_disable() is not only a problem for + * work-conserving schedulers. + * + */ +extern void migrate_disable(void); +extern void migrate_enable(void); + +#elif defined(CONFIG_PREEMPT_RT) + +static inline void migrate_disable(void) { } +static inline void migrate_enable(void) { } + +#else /* !CONFIG_PREEMPT_RT */ + /** * migrate_disable - Prevent migration of the current task * @@ -352,4 +415,6 @@ static __always_inline void migrate_enable(void) preempt_enable(); } +#endif /* CONFIG_SMP && CONFIG_PREEMPT_RT */ + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 063cd120b459..0732356c0eca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -714,6 +714,9 @@ struct task_struct { int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t cpus_mask; +#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) + int migration_disabled; +#endif #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; -- cgit v1.2.3 From 6d337eab041d56bb8f0e7794f39906c21054c512 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Sep 2020 17:24:31 +0200 Subject: sched: Fix migrate_disable() vs set_cpus_allowed_ptr() Concurrent migrate_disable() and set_cpus_allowed_ptr() has interesting features. We rely on set_cpus_allowed_ptr() to not return until the task runs inside the provided mask. This expectation is exported to userspace. This means that any set_cpus_allowed_ptr() caller must wait until migrate_enable() allows migrations. At the same time, we don't want migrate_enable() to schedule, due to patterns like: preempt_disable(); migrate_disable(); ... migrate_enable(); preempt_enable(); And: raw_spin_lock(&B); spin_unlock(&A); this means that when migrate_enable() must restore the affinity mask, it cannot wait for completion thereof. Luck will have it that that is exactly the case where there is a pending set_cpus_allowed_ptr(), so let that provide storage for the async stop machine. Much thanks to Valentin who used TLA+ most effective and found lots of 'interesting' cases. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102346.921768277@infradead.org --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0732356c0eca..90a0c92741d7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -714,6 +714,7 @@ struct task_struct { int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t cpus_mask; + void *migration_pending; #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) int migration_disabled; #endif -- cgit v1.2.3 From 14e292f8d45380c519a83d9b0f37089a17eedcdf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 1 Oct 2020 15:54:14 +0200 Subject: sched,rt: Use cpumask_any*_distribute() Replace a bunch of cpumask_any*() instances with cpumask_any*_distribute(), by injecting this little bit of random in cpu selection, we reduce the chance two competing balance operations working off the same lowest_mask pick the same CPU. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102347.190759694@infradead.org --- include/linux/cpumask.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f0d895d6ac39..383684e30f12 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -199,6 +199,11 @@ static inline int cpumask_any_and_distribute(const struct cpumask *src1p, return cpumask_next_and(-1, src1p, src2p); } +static inline int cpumask_any_distribute(const struct cpumask *srcp) +{ + return cpumask_first(srcp); +} + #define for_each_cpu(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_not(cpu, mask) \ @@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); unsigned int cpumask_local_spread(unsigned int i, int node); int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p); +int cpumask_any_distribute(const struct cpumask *srcp); /** * for_each_cpu - iterate over every cpu in a mask -- cgit v1.2.3 From a7c81556ec4d341dfdbf2cc478ead89d73e474a7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Sep 2020 17:06:07 +0200 Subject: sched: Fix migrate_disable() vs rt/dl balancing In order to minimize the interference of migrate_disable() on lower priority tasks, which can be deprived of runtime due to being stuck below a higher priority task. Teach the RT/DL balancers to push away these higher priority tasks when a lower priority task gets selected to run on a freshly demoted CPU (pull). This adds migration interference to the higher priority task, but restores bandwidth to system that would otherwise be irrevocably lost. Without this it would be possible to have all tasks on the system stuck on a single CPU, each task preempted in a migrate_disable() section with a single high priority task running. This way we can still approximate running the M highest priority tasks on the system. Migrating the top task away is (ofcourse) still subject to migrate_disable() too, which means the lower task is subject to an interference equivalent to the worst case migrate_disable() section. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102347.499155098@infradead.org --- include/linux/preempt.h | 40 ++++++++++++++++++++++------------------ include/linux/sched.h | 3 ++- 2 files changed, 24 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 97ba7c920653..8b43922e65df 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -325,24 +325,28 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) /* - * Migrate-Disable and why it is (strongly) undesired. - * - * The premise of the Real-Time schedulers we have on Linux - * (SCHED_FIFO/SCHED_DEADLINE) is that M CPUs can/will run M tasks - * concurrently, provided there are sufficient runnable tasks, also known as - * work-conserving. For instance SCHED_DEADLINE tries to schedule the M - * earliest deadline threads, and SCHED_FIFO the M highest priority threads. - * - * The correctness of various scheduling models depends on this, but is it - * broken by migrate_disable() that doesn't imply preempt_disable(). Where - * preempt_disable() implies an immediate priority ceiling, preemptible - * migrate_disable() allows nesting. - * - * The worst case is that all tasks preempt one another in a migrate_disable() - * region and stack on a single CPU. This then reduces the available bandwidth - * to a single CPU. And since Real-Time schedulability theory considers the - * Worst-Case only, all Real-Time analysis shall revert to single-CPU - * (instantly solving the SMP analysis problem). + * Migrate-Disable and why it is undesired. + * + * When a preempted task becomes elegible to run under the ideal model (IOW it + * becomes one of the M highest priority tasks), it might still have to wait + * for the preemptee's migrate_disable() section to complete. Thereby suffering + * a reduction in bandwidth in the exact duration of the migrate_disable() + * section. + * + * Per this argument, the change from preempt_disable() to migrate_disable() + * gets us: + * + * - a higher priority tasks gains reduced wake-up latency; with preempt_disable() + * it would have had to wait for the lower priority task. + * + * - a lower priority tasks; which under preempt_disable() could've instantly + * migrated away when another CPU becomes available, is now constrained + * by the ability to push the higher priority task away, which might itself be + * in a migrate_disable() section, reducing it's available bandwidth. + * + * IOW it trades latency / moves the interference term, but it stays in the + * system, and as long as it remains unbounded, the system is not fully + * deterministic. * * * The reason we have it anyway. diff --git a/include/linux/sched.h b/include/linux/sched.h index 90a0c92741d7..3af9d52fe093 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -716,8 +716,9 @@ struct task_struct { cpumask_t cpus_mask; void *migration_pending; #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) - int migration_disabled; + unsigned short migration_disabled; #endif + unsigned short migration_flags; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; -- cgit v1.2.3 From c250d50fe2ce627ca9805d9c8ac11cbbf922a4a6 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 5 Nov 2020 12:50:01 +0000 Subject: PM: EM: Add a flag indicating units of power values in Energy Model There are different platforms and devices which might use different scale for the power values. Kernel sub-systems might need to check if all Energy Model (EM) devices are using the same scale. Address that issue and store the information inside EM for each device. Thanks to that they can be easily compared and proper action triggered. Suggested-by: Daniel Lezcano Reviewed-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index b67a51c574b9..3a33c738d876 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -29,6 +29,8 @@ struct em_perf_state { * em_perf_domain - Performance domain * @table: List of performance states, in ascending order * @nr_perf_states: Number of performance states + * @milliwatts: Flag indicating the power values are in milli-Watts + * or some other scale. * @cpus: Cpumask covering the CPUs of the domain. It's here * for performance reasons to avoid potential cache * misses during energy calculations in the scheduler @@ -43,6 +45,7 @@ struct em_perf_state { struct em_perf_domain { struct em_perf_state *table; int nr_perf_states; + int milliwatts; unsigned long cpus[]; }; @@ -79,7 +82,8 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span); + struct em_data_callback *cb, cpumask_t *span, + bool milliwatts); void em_dev_unregister_perf_domain(struct device *dev); /** @@ -186,7 +190,8 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span) + struct em_data_callback *cb, cpumask_t *span, + bool milliwatts) { return -EINVAL; } -- cgit v1.2.3 From f2c90b12e700fff6a0b5a1c32f446f05f9d0890c Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 3 Nov 2020 09:05:59 +0000 Subject: PM: EM: update the comments related to power scale The Energy Model supports power values expressed in milli-Watts or in an 'abstract scale'. Update the related comments is the code to reflect that state. Reviewed-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 3a33c738d876..9618c0a46ef4 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -13,9 +13,8 @@ /** * em_perf_state - Performance state of a performance domain * @frequency: The frequency in KHz, for consistency with CPUFreq - * @power: The power consumed at this level, in milli-watts (by 1 CPU or - by a registered device). It can be a total power: static and - dynamic. + * @power: The power consumed at this level (by 1 CPU or by a registered + * device). It can be a total power: static and dynamic. * @cost: The cost coefficient associated with this level, used during * energy calculation. Equal to: power * max_frequency / frequency */ @@ -58,7 +57,7 @@ struct em_data_callback { /** * active_power() - Provide power at the next performance state of * a device - * @power : Active power at the performance state in mW + * @power : Active power at the performance state * (modified) * @freq : Frequency at the performance state in kHz * (modified) @@ -69,8 +68,8 @@ struct em_data_callback { * and frequency. * * In case of CPUs, the power is the one of a single CPU in the domain, - * expressed in milli-watts. It is expected to fit in the - * [0, EM_MAX_POWER] range. + * expressed in milli-Watts or an abstract scale. It is expected to + * fit in the [0, EM_MAX_POWER] range. * * Return 0 on success. */ -- cgit v1.2.3 From fc51989062138744b56e47190915ce68484e73f3 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 3 Nov 2020 16:06:25 +0100 Subject: PM: domains: Rename pm_genpd_syscore_poweroff|poweron() To better describe what the pm_genpd_syscore_poweroff|poweron() functions actually do, let's rename them to dev_pm_genpd_suspend|resume() and update the rather few callers of them accordingly (a couple of clocksource drivers). Moreover, let's take the opportunity to add some documentation of these exported functions, as that is currently missing. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1ad0ec481416..a8f93328daec 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -280,11 +280,11 @@ static inline int dev_pm_genpd_remove_notifier(struct device *dev) #endif #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP -void pm_genpd_syscore_poweroff(struct device *dev); -void pm_genpd_syscore_poweron(struct device *dev); +void dev_pm_genpd_suspend(struct device *dev); +void dev_pm_genpd_resume(struct device *dev); #else -static inline void pm_genpd_syscore_poweroff(struct device *dev) {} -static inline void pm_genpd_syscore_poweron(struct device *dev) {} +static inline void dev_pm_genpd_suspend(struct device *dev) {} +static inline void dev_pm_genpd_resume(struct device *dev) {} #endif /* OF PM domain providers */ -- cgit v1.2.3 From 36e68442d1afd4f720704ee1ea8486331507e834 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 9 Nov 2020 17:19:31 -0800 Subject: bpf: Load and verify kernel module BTFs Add kernel module listener that will load/validate and unload module BTF. Module BTFs gets ID generated for them, which makes it possible to iterate them with existing BTF iteration API. They are given their respective module's names, which will get reported through GET_OBJ_INFO API. They are also marked as in-kernel BTFs for tooling to distinguish them from user-provided BTFs. Also, similarly to vmlinux BTF, kernel module BTFs are exposed through sysfs as /sys/kernel/btf/. This is convenient for user-space tools to inspect module BTF contents and dump their types with existing tools: [vmuser@archvm bpf]$ ls -la /sys/kernel/btf total 0 drwxr-xr-x 2 root root 0 Nov 4 19:46 . drwxr-xr-x 13 root root 0 Nov 4 19:46 .. ... -r--r--r-- 1 root root 888 Nov 4 19:46 irqbypass -r--r--r-- 1 root root 100225 Nov 4 19:46 kvm -r--r--r-- 1 root root 35401 Nov 4 19:46 kvm_intel -r--r--r-- 1 root root 120 Nov 4 19:46 pcspkr -r--r--r-- 1 root root 399 Nov 4 19:46 serio_raw -r--r--r-- 1 root root 4094095 Nov 4 19:46 vmlinux Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/bpf/20201110011932.3201430-5-andrii@kernel.org --- include/linux/bpf.h | 2 ++ include/linux/module.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 73d5381a5d5c..581b2a2e78eb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,9 +36,11 @@ struct seq_operations; struct bpf_iter_aux_info; struct bpf_local_storage; struct bpf_local_storage_map; +struct kobject; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; +extern struct kobject *btf_kobj; typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, struct bpf_iter_aux_info *aux); diff --git a/include/linux/module.h b/include/linux/module.h index a29187f7c360..20fce258ffba 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -475,6 +475,10 @@ struct module { unsigned int num_bpf_raw_events; struct bpf_raw_event_map *bpf_raw_events; #endif +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + unsigned int btf_data_size; + void *btf_data; +#endif #ifdef CONFIG_JUMP_LABEL struct jump_entry *jump_entries; unsigned int num_jump_entries; -- cgit v1.2.3 From 8a3c84b649b033024d2349f96234b26cbd6083a6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:50:21 -0800 Subject: vfs: separate __sb_start_write into blocking and non-blocking helpers Break this function into two helpers so that it's obvious that the trylock versions return a value that must be checked, and the blocking versions don't require that. While we're at it, clean up the return type mismatch. Signed-off-by: Darrick J. Wong Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig --- include/linux/fs.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0bd126418bb6..305989afd49c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1581,7 +1581,8 @@ extern struct timespec64 current_time(struct inode *inode); */ void __sb_end_write(struct super_block *sb, int level); -int __sb_start_write(struct super_block *sb, int level, bool wait); +void __sb_start_write(struct super_block *sb, int level); +bool __sb_start_write_trylock(struct super_block *sb, int level); #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) @@ -1645,12 +1646,12 @@ static inline void sb_end_intwrite(struct super_block *sb) */ static inline void sb_start_write(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_WRITE, true); + __sb_start_write(sb, SB_FREEZE_WRITE); } -static inline int sb_start_write_trylock(struct super_block *sb) +static inline bool sb_start_write_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_WRITE, false); + return __sb_start_write_trylock(sb, SB_FREEZE_WRITE); } /** @@ -1674,7 +1675,7 @@ static inline int sb_start_write_trylock(struct super_block *sb) */ static inline void sb_start_pagefault(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); + __sb_start_write(sb, SB_FREEZE_PAGEFAULT); } /* @@ -1692,12 +1693,12 @@ static inline void sb_start_pagefault(struct super_block *sb) */ static inline void sb_start_intwrite(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_FS, true); + __sb_start_write(sb, SB_FREEZE_FS); } -static inline int sb_start_intwrite_trylock(struct super_block *sb) +static inline bool sb_start_intwrite_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_FS, false); + return __sb_start_write_trylock(sb, SB_FREEZE_FS); } @@ -2756,14 +2757,14 @@ static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; - __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); + sb_start_write(file_inode(file)->i_sb); } static inline bool file_start_write_trylock(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; - return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false); + return sb_start_write_trylock(file_inode(file)->i_sb); } static inline void file_end_write(struct file *file) -- cgit v1.2.3 From 9b8523423b23ee3dfd88e32f5b7207be56a4e782 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:50:21 -0800 Subject: vfs: move __sb_{start,end}_write* to fs.h Now that we've straightened out the callers, move these three functions to fs.h since they're fairly trivial. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara --- include/linux/fs.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 305989afd49c..6dabd019cab0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1580,9 +1580,24 @@ extern struct timespec64 current_time(struct inode *inode); * Snapshotting support. */ -void __sb_end_write(struct super_block *sb, int level); -void __sb_start_write(struct super_block *sb, int level); -bool __sb_start_write_trylock(struct super_block *sb, int level); +/* + * These are internal functions, please use sb_start_{write,pagefault,intwrite} + * instead. + */ +static inline void __sb_end_write(struct super_block *sb, int level) +{ + percpu_up_read(sb->s_writers.rw_sem + level-1); +} + +static inline void __sb_start_write(struct super_block *sb, int level) +{ + percpu_down_read(sb->s_writers.rw_sem + level - 1); +} + +static inline bool __sb_start_write_trylock(struct super_block *sb, int level) +{ + return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); +} #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) -- cgit v1.2.3 From 60602cb549f1965a7edbc96026760dfb93911fab Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Oct 2020 08:19:24 -0400 Subject: fgraph: Make overruns 4 bytes in graph stack structure Inspecting the data structures of the function graph tracer, I found that the overrun value is unsigned long, which is 8 bytes on a 64 bit machine, and not only that, the depth is an int (4 bytes). The overrun can be simply an unsigned int (4 bytes) and pack the ftrace_graph_ret structure better. The depth is moved up next to the func, as it is used more often with func, and improves cache locality. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 806196345c3f..8dde9c17aaa5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -864,11 +864,11 @@ struct ftrace_graph_ent { */ struct ftrace_graph_ret { unsigned long func; /* Current function */ + int depth; /* Number of functions that overran the depth limit for current task */ - unsigned long overrun; + unsigned int overrun; unsigned long long calltime; unsigned long long rettime; - int depth; } __packed; /* Type of the callback handlers for tracing function graph*/ -- cgit v1.2.3 From 7b68621f8d16689cbb4203aceaca86ffb165f1d0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 30 Oct 2020 17:21:00 -0400 Subject: ftrace: Clean up the recursion code a bit In trace_test_and_set_recursion(), current->trace_recursion is placed into a variable, and that variable should be used for the processing, as there's no reason to dereference current multiple times. On trace_clear_recursion(), current->trace_recursion is modified and there's no reason to copy it over to a variable. Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_recursion.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 228cc56ed66e..a9f9c5714e65 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -162,7 +162,7 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, int start, int max) { - unsigned int val = current->trace_recursion; + unsigned int val = READ_ONCE(current->trace_recursion); int bit; /* A previous recursion check was made */ @@ -176,18 +176,15 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign * a switch between contexts. Allow for a single recursion. */ bit = TRACE_TRANSITION_BIT; - if (trace_recursion_test(bit)) { + if (val & (1 << bit)) { do_ftrace_record_recursion(ip, pip); return -1; } - trace_recursion_set(bit); - barrier(); - return bit + 1; + } else { + /* Normal check passed, clear the transition to allow it again */ + val &= ~(1 << TRACE_TRANSITION_BIT); } - /* Normal check passed, clear the transition to allow it again */ - trace_recursion_clear(TRACE_TRANSITION_BIT); - val |= 1 << bit; current->trace_recursion = val; barrier(); @@ -197,17 +194,12 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign static __always_inline void trace_clear_recursion(int bit) { - unsigned int val = current->trace_recursion; - if (!bit) return; - bit--; - bit = 1 << bit; - val &= ~bit; - barrier(); - current->trace_recursion = val; + bit--; + trace_recursion_clear(bit); } /** -- cgit v1.2.3 From 4210d50f0b3e423e10a7a254b2a67f5c5318868e Mon Sep 17 00:00:00 2001 From: Isaac Hazan Date: Thu, 24 Sep 2020 11:43:58 +0300 Subject: thunderbolt: Add link_speed and link_width to XDomain Link speed and link width are needed for checking expected values in case of using a loopback service. Signed-off-by: Isaac Hazan Signed-off-by: Mika Westerberg Acked-by: Yehezkel Bernat Reviewed-by: Greg Kroah-Hartman --- include/linux/thunderbolt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 5db2b11ab085..e441af88ed77 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -179,6 +179,8 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * @lock: Lock to serialize access to the following fields of this structure * @vendor_name: Name of the vendor (or %NULL if not known) * @device_name: Name of the device (or %NULL if not known) + * @link_speed: Speed of the link in Gb/s + * @link_width: Width of the link (1 or 2) * @is_unplugged: The XDomain is unplugged * @resume: The XDomain is being resumed * @needs_uuid: If the XDomain does not have @remote_uuid it will be @@ -223,6 +225,8 @@ struct tb_xdomain { struct mutex lock; const char *vendor_name; const char *device_name; + unsigned int link_speed; + unsigned int link_width; bool is_unplugged; bool resume; bool needs_uuid; -- cgit v1.2.3 From 5cc0df9ce10a860aaeac53f8df1cc8754c5c7b03 Mon Sep 17 00:00:00 2001 From: Isaac Hazan Date: Thu, 24 Sep 2020 11:44:01 +0300 Subject: thunderbolt: Add functions for enabling and disabling lane bonding on XDomain These can be used by service drivers to enable and disable lane bonding as needed. Signed-off-by: Isaac Hazan Signed-off-by: Mika Westerberg Acked-by: Yehezkel Bernat Reviewed-by: Greg Kroah-Hartman --- include/linux/thunderbolt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index e441af88ed77..0a747f92847e 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -247,6 +247,8 @@ struct tb_xdomain { u8 depth; }; +int tb_xdomain_lane_bonding_enable(struct tb_xdomain *xd); +void tb_xdomain_lane_bonding_disable(struct tb_xdomain *xd); int tb_xdomain_enable_paths(struct tb_xdomain *xd, u16 transmit_path, u16 transmit_ring, u16 receive_path, u16 receive_ring); -- cgit v1.2.3 From 407ac931aefda91ac90498c6b6e6893982173613 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 7 Oct 2020 17:53:44 +0300 Subject: thunderbolt: Create debugfs directory automatically for services This allows service drivers to use it as parent directory if they need to add their own debugfs entries. Signed-off-by: Mika Westerberg Acked-by: Yehezkel Bernat Reviewed-by: Greg Kroah-Hartman --- include/linux/thunderbolt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 0a747f92847e..a844fd5d96ab 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -350,6 +350,9 @@ void tb_unregister_protocol_handler(struct tb_protocol_handler *handler); * @prtcvers: Protocol version from the properties directory * @prtcrevs: Protocol software revision from the properties directory * @prtcstns: Protocol settings mask from the properties directory + * @debugfs_dir: Pointer to the service debugfs directory. Always created + * when debugfs is enabled. Can be used by service drivers to + * add their own entries under the service. * * Each domain exposes set of services it supports as collection of * properties. For each service there will be one corresponding @@ -363,6 +366,7 @@ struct tb_service { u32 prtcvers; u32 prtcrevs; u32 prtcstns; + struct dentry *debugfs_dir; }; static inline struct tb_service *tb_service_get(struct tb_service *svc) -- cgit v1.2.3 From afe704a2d0618ebdb559b5ddb059f6cdbfc78783 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 19 Oct 2020 19:15:20 +0300 Subject: thunderbolt: Add support for end-to-end flow control USB4 spec defines end-to-end (E2E) flow control that can be used between hosts to prevent overflow of a RX ring. We previously had this partially implemented but that code was removed with commit 53f13319d131 ("thunderbolt: Get rid of E2E workaround") with the idea that we add it back properly if there ever is need. Now that we are going to add DMA traffic test driver (in subsequent patches) this can be useful. For this reason we modify tb_ring_alloc_rx/tx() so that they accept RING_FLAG_E2E and configure the hardware ring accordingly. The RX side also requires passing TX HopID (e2e_tx_hop) used in the credit grant packets. Signed-off-by: Mika Westerberg Acked-by: Yehezkel Bernat Reviewed-by: Greg Kroah-Hartman --- include/linux/thunderbolt.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index a844fd5d96ab..034dccf93955 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -481,6 +481,8 @@ struct tb_nhi { * @irq: MSI-X irq number if the ring uses MSI-X. %0 otherwise. * @vector: MSI-X vector number the ring uses (only set if @irq is > 0) * @flags: Ring specific flags + * @e2e_tx_hop: Transmit HopID when E2E is enabled. Only applicable to + * RX ring. For TX ring this should be set to %0. * @sof_mask: Bit mask used to detect start of frame PDF * @eof_mask: Bit mask used to detect end of frame PDF * @start_poll: Called when ring interrupt is triggered to start @@ -504,6 +506,7 @@ struct tb_ring { int irq; u8 vector; unsigned int flags; + int e2e_tx_hop; u16 sof_mask; u16 eof_mask; void (*start_poll)(void *data); @@ -514,6 +517,8 @@ struct tb_ring { #define RING_FLAG_NO_SUSPEND BIT(0) /* Configure the ring to be in frame mode */ #define RING_FLAG_FRAME BIT(1) +/* Enable end-to-end flow control */ +#define RING_FLAG_E2E BIT(2) struct ring_frame; typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled); @@ -562,7 +567,8 @@ struct ring_frame { struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, unsigned int flags); struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags, u16 sof_mask, u16 eof_mask, + unsigned int flags, int e2e_tx_hop, + u16 sof_mask, u16 eof_mask, void (*start_poll)(void *), void *poll_data); void tb_ring_start(struct tb_ring *ring); void tb_ring_stop(struct tb_ring *ring); -- cgit v1.2.3 From 029b42d8519cef70c4fb5fcaccd08f1053ed2bf0 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 27 Oct 2020 10:57:23 +0100 Subject: spi: introduce SPI_MODE_X_MASK macro Provide a macro to filter all SPI_MODE_0,1,2,3 mode in one run. The latest SPI framework will parse the devicetree in following call sequence: of_register_spi_device() -> of_spi_parse_dt() So, driver do not need to pars the devicetree and will get prepared flags in the probe. On one hand it is good far most drivers. On other hand some drivers need to filter flags provide by SPI framework and apply know to work flags. This drivers may use SPI_MODE_X_MASK to filter MODE flags and set own, known flags: spi->flags &= ~SPI_MODE_X_MASK; spi->flags |= SPI_MODE_0; Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20201027095724.18654-2-o.rempel@pengutronix.de Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 99380c0825db..8097f27702f3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -171,6 +171,7 @@ struct spi_device { #define SPI_MODE_1 (0|SPI_CPHA) #define SPI_MODE_2 (SPI_CPOL|0) #define SPI_MODE_3 (SPI_CPOL|SPI_CPHA) +#define SPI_MODE_X_MASK (SPI_CPOL|SPI_CPHA) #define SPI_CS_HIGH 0x04 /* chipselect active high? */ #define SPI_LSB_FIRST 0x08 /* per-word bits-on-wire */ #define SPI_3WIRE 0x10 /* SI/SO signals shared */ -- cgit v1.2.3 From 476b485be03c37212fcf10d85510aae5d34316ef Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 10 Nov 2020 13:41:17 -0800 Subject: dma-buf: Document that dma-buf size is fixed The fact that the size of dma-buf is invariant over the lifetime of the buffer is mentioned in the comment of 'dma_buf_ops.mmap', but is not documented at where the info is defined. Add the missing documentation. Signed-off-by: Jianxin Xiong Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1605044477-51833-7-git-send-email-jianxin.xiong@intel.com --- include/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 03875eaed51a..cf72699cb2bc 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -273,7 +273,7 @@ struct dma_buf_ops { /** * struct dma_buf - shared buffer object - * @size: size of the buffer + * @size: size of the buffer; invariant over the lifetime of the buffer. * @file: file pointer used for sharing buffers across, and for refcounting. * @attachments: list of dma_buf_attachment that denotes all devices attached, * protected by dma_resv lock. @@ -405,7 +405,7 @@ struct dma_buf_attachment { * @exp_name: name of the exporter - useful for debugging. * @owner: pointer to exporter module - used for refcounting kernel module * @ops: Attach allocator-defined dma buf ops to the new buffer - * @size: Size of the buffer + * @size: Size of the buffer - invariant over the lifetime of the buffer * @flags: mode flags for the file * @resv: reservation-object, NULL to allocate default one * @priv: Attach private data of allocator to this buffer -- cgit v1.2.3 From c0c5a60f0f1311bcf08bbe735122096d6326fb5b Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 7 Nov 2020 20:35:13 +0100 Subject: net: evaluate net.ipvX.conf.all.ignore_routes_with_linkdown Introduced in 0eeb075fad73, the "ignore_routes_with_linkdown" sysctl ignores a route whose interface is down. It is provided as a per-interface sysctl. However, while a "all" variant is exposed, it was a noop since it was never evaluated. We use the usual "or" logic for this kind of sysctls. Tested with: ip link add type veth # veth0 + veth1 ip link add type veth # veth1 + veth2 ip link set up dev veth0 ip link set up dev veth1 # link-status paired with veth0 ip link set up dev veth2 ip link set up dev veth3 # link-status paired with veth2 # First available path ip -4 addr add 203.0.113.${uts#H}/24 dev veth0 ip -6 addr add 2001:db8:1::${uts#H}/64 dev veth0 # Second available path ip -4 addr add 192.0.2.${uts#H}/24 dev veth2 ip -6 addr add 2001:db8:2::${uts#H}/64 dev veth2 # More specific route through first path ip -4 route add 198.51.100.0/25 via 203.0.113.254 # via veth0 ip -6 route add 2001:db8:3::/56 via 2001:db8:1::ff # via veth0 # Less specific route through second path ip -4 route add 198.51.100.0/24 via 192.0.2.254 # via veth2 ip -6 route add 2001:db8:3::/48 via 2001:db8:2::ff # via veth2 # H1: enable on "all" # H2: enable on "veth0" for v in ipv4 ipv6; do case $uts in H1) sysctl -qw net.${v}.conf.all.ignore_routes_with_linkdown=1 ;; H2) sysctl -qw net.${v}.conf.veth0.ignore_routes_with_linkdown=1 ;; esac done set -xe # When veth0 is up, best route is through veth0 ip -o route get 198.51.100.1 | grep -Fw veth0 ip -o route get 2001:db8:3::1 | grep -Fw veth0 # When veth0 is down, best route should be through veth2 on H1/H2, # but on veth0 on H2 ip link set down dev veth1 # down veth0 ip route show [ $uts != H3 ] || ip -o route get 198.51.100.1 | grep -Fw veth0 [ $uts != H3 ] || ip -o route get 2001:db8:3::1 | grep -Fw veth0 [ $uts = H3 ] || ip -o route get 198.51.100.1 | grep -Fw veth2 [ $uts = H3 ] || ip -o route get 2001:db8:3::1 | grep -Fw veth2 Without this patch, the two last lines would fail on H1 (the one using the "all" sysctl). With the patch, everything succeeds as expected. Also document the sysctl in `ip-sysctl.rst`. Fixes: 0eeb075fad73 ("net: ipv4 sysctl option to ignore routes when nexthop link is down") Signed-off-by: Vincent Bernat Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 3515ca64e638..3bbcddd22df8 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -126,7 +126,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) #define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \ - IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) + IN_DEV_ORCONF((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) #define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) -- cgit v1.2.3 From 1af5318c00a8acc33a90537af49b3f23f72a2c4b Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 7 Nov 2020 20:35:14 +0100 Subject: net: evaluate net.ipv4.conf.all.proxy_arp_pvlan Introduced in 65324144b50b, the "proxy_arp_vlan" sysctl is a per-interface sysctl to tune proxy ARP support for private VLANs. While the "all" variant is exposed, it was a noop and never evaluated. We use the usual "or" logic for this kind of sysctls. Fixes: 65324144b50b ("net: RFC3069, private VLAN proxy arp support") Signed-off-by: Vincent Bernat Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 3bbcddd22df8..53aa0343bf69 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -105,7 +105,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_LOG_MARTIANS(in_dev) IN_DEV_ORCONF((in_dev), LOG_MARTIANS) #define IN_DEV_PROXY_ARP(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP) -#define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_CONF_GET(in_dev, PROXY_ARP_PVLAN) +#define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP_PVLAN) #define IN_DEV_SHARED_MEDIA(in_dev) IN_DEV_ORCONF((in_dev), SHARED_MEDIA) #define IN_DEV_TX_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), SEND_REDIRECTS) #define IN_DEV_SEC_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), \ -- cgit v1.2.3 From 1f78ae99790812481b7944cf40008aed5fd2ef18 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 10 Nov 2020 18:48:40 -0300 Subject: serial: imx: Remove unused platform data support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 5.10-rc1 i.MX is a devicetree-only platform and the existing platform data support in this driver was only useful for old non-devicetree platforms. Get rid of the platform data support since it is no longer used. Reviewed-by: Fugang Duan Acked-by: Uwe Kleine-König Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20201110214840.16768-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/serial-imx.h | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 include/linux/platform_data/serial-imx.h (limited to 'include/linux') diff --git a/include/linux/platform_data/serial-imx.h b/include/linux/platform_data/serial-imx.h deleted file mode 100644 index 0844b21372c7..000000000000 --- a/include/linux/platform_data/serial-imx.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2008 by Sascha Hauer - */ - -#ifndef ASMARM_ARCH_UART_H -#define ASMARM_ARCH_UART_H - -#define IMXUART_HAVE_RTSCTS (1<<0) - -struct imxuart_platform_data { - unsigned int flags; -}; - -#endif -- cgit v1.2.3 From 5e844cc37a5cbaa460e68f9a989d321d63088a89 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Nov 2020 20:07:10 +0100 Subject: spi: Introduce device-managed SPI controller allocation SPI driver probing currently comprises two steps, whereas removal comprises only one step: spi_alloc_master() spi_register_controller() spi_unregister_controller() That's because spi_unregister_controller() calls device_unregister() instead of device_del(), thereby releasing the reference on the spi_controller which was obtained by spi_alloc_master(). An SPI driver's private data is contained in the same memory allocation as the spi_controller struct. Thus, once spi_unregister_controller() has been called, the private data is inaccessible. But some drivers need to access it after spi_unregister_controller() to perform further teardown steps. Introduce devm_spi_alloc_master() and devm_spi_alloc_slave(), which release a reference on the spi_controller struct only after the driver has unbound, thereby keeping the memory allocation accessible. Change spi_unregister_controller() to not release a reference if the spi_controller was allocated by one of these new devm functions. The present commit is small enough to be backportable to stable. It allows fixing drivers which use the private data in their ->remove() hook after it's been freed. It also allows fixing drivers which neglect to release a reference on the spi_controller in the probe error path. Long-term, most SPI drivers shall be moved over to the devm functions introduced herein. The few that can't shall be changed in a treewide commit to explicitly release the last reference on the controller. That commit shall amend spi_unregister_controller() to no longer release a reference, thereby completing the migration. As a result, the behaviour will be less surprising and more consistent with subsystems such as IIO, which also includes the private data in the allocation of the generic iio_dev struct, but calls device_del() in iio_device_unregister(). Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/272bae2ef08abd21388c98e23729886663d19192.1605121038.git.lukas@wunner.de Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 99380c0825db..b390fdac1587 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -734,6 +734,25 @@ static inline struct spi_controller *spi_alloc_slave(struct device *host, return __spi_alloc_controller(host, size, true); } +struct spi_controller *__devm_spi_alloc_controller(struct device *dev, + unsigned int size, + bool slave); + +static inline struct spi_controller *devm_spi_alloc_master(struct device *dev, + unsigned int size) +{ + return __devm_spi_alloc_controller(dev, size, false); +} + +static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev, + unsigned int size) +{ + if (!IS_ENABLED(CONFIG_SPI_SLAVE)) + return NULL; + + return __devm_spi_alloc_controller(dev, size, true); +} + extern int spi_register_controller(struct spi_controller *ctlr); extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); -- cgit v1.2.3 From cd2c40ff90b0e385c18f881ab5e17f7137864223 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Thu, 29 Oct 2020 15:27:36 -0700 Subject: platform/chrome: cros_ec: Import Type C host commands Import the EC_CMD_TYPEC_STATUS and EC_CMD_TYPEC_DISCOVERY Chrome OS EC host commands from the EC code base [1]. These commands can be used by the application processor to query Power Delivery (PD) discovery information concerning connected Type C peripherals. Also add the EC_FEATURE_TYPEC_CMD feature flag, which is used to determine whether these commands are supported by the EC. [1]: https://chromium.googlesource.com/chromiumos/platform/ec/+/refs/heads/master/include/ec_commands.h Signed-off-by: Prashant Malani Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20201029222738.482366-5-pmalani@chromium.org --- include/linux/platform_data/cros_ec_commands.h | 155 +++++++++++++++++++++++++ 1 file changed, 155 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 1fcfe9e63cb9..7f54fdcdd8cb 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1284,6 +1284,8 @@ enum ec_feature_code { EC_FEATURE_SCP = 39, /* The MCU is an Integrated Sensor Hub */ EC_FEATURE_ISH = 40, + /* New TCPMv2 TYPEC_ prefaced commands supported */ + EC_FEATURE_TYPEC_CMD = 41, }; #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32) @@ -5528,6 +5530,159 @@ struct ec_response_regulator_get_voltage { uint32_t voltage_mv; } __ec_align4; +/* + * Gather all discovery information for the given port and partner type. + * + * Note that if discovery has not yet completed, only the currently completed + * responses will be filled in. If the discovery data structures are changed + * in the process of the command running, BUSY will be returned. + * + * VDO field sizes are set to the maximum possible number of VDOs a VDM may + * contain, while the number of SVIDs here is selected to fit within the PROTO2 + * maximum parameter size. + */ +#define EC_CMD_TYPEC_DISCOVERY 0x0131 + +enum typec_partner_type { + TYPEC_PARTNER_SOP = 0, + TYPEC_PARTNER_SOP_PRIME = 1, +}; + +struct ec_params_typec_discovery { + uint8_t port; + uint8_t partner_type; /* enum typec_partner_type */ +} __ec_align1; + +struct svid_mode_info { + uint16_t svid; + uint16_t mode_count; /* Number of modes partner sent */ + uint32_t mode_vdo[6]; /* Max VDOs allowed after VDM header is 6 */ +}; + +struct ec_response_typec_discovery { + uint8_t identity_count; /* Number of identity VDOs partner sent */ + uint8_t svid_count; /* Number of SVIDs partner sent */ + uint16_t reserved; + uint32_t discovery_vdo[6]; /* Max VDOs allowed after VDM header is 6 */ + struct svid_mode_info svids[0]; +} __ec_align1; + +/* + * Gather all status information for a port. + * + * Note: this covers many of the return fields from the deprecated + * EC_CMD_USB_PD_CONTROL command, except those that are redundant with the + * discovery data. The "enum pd_cc_states" is defined with the deprecated + * EC_CMD_USB_PD_CONTROL command. + * + * This also combines in the EC_CMD_USB_PD_MUX_INFO flags. + */ +#define EC_CMD_TYPEC_STATUS 0x0133 + +/* + * Power role. + * + * Note this is also used for PD header creation, and values align to those in + * the Power Delivery Specification Revision 3.0 (See + * 6.2.1.1.4 Port Power Role). + */ +enum pd_power_role { + PD_ROLE_SINK = 0, + PD_ROLE_SOURCE = 1 +}; + +/* + * Data role. + * + * Note this is also used for PD header creation, and the first two values + * align to those in the Power Delivery Specification Revision 3.0 (See + * 6.2.1.1.6 Port Data Role). + */ +enum pd_data_role { + PD_ROLE_UFP = 0, + PD_ROLE_DFP = 1, + PD_ROLE_DISCONNECTED = 2, +}; + +enum pd_vconn_role { + PD_ROLE_VCONN_OFF = 0, + PD_ROLE_VCONN_SRC = 1, +}; + +/* + * Note: BIT(0) may be used to determine whether the polarity is CC1 or CC2, + * regardless of whether a debug accessory is connected. + */ +enum tcpc_cc_polarity { + /* + * _CCx: is used to indicate the polarity while not connected to + * a Debug Accessory. Only one CC line will assert a resistor and + * the other will be open. + */ + POLARITY_CC1 = 0, + POLARITY_CC2 = 1, + + /* + * _CCx_DTS is used to indicate the polarity while connected to a + * SRC Debug Accessory. Assert resistors on both lines. + */ + POLARITY_CC1_DTS = 2, + POLARITY_CC2_DTS = 3, + + /* + * The current TCPC code relies on these specific POLARITY values. + * Adding in a check to verify if the list grows for any reason + * that this will give a hint that other places need to be + * adjusted. + */ + POLARITY_COUNT +}; + +#define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0) +#define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1) + +struct ec_params_typec_status { + uint8_t port; +} __ec_align1; + +struct ec_response_typec_status { + uint8_t pd_enabled; /* PD communication enabled - bool */ + uint8_t dev_connected; /* Device connected - bool */ + uint8_t sop_connected; /* Device is SOP PD capable - bool */ + uint8_t source_cap_count; /* Number of Source Cap PDOs */ + + uint8_t power_role; /* enum pd_power_role */ + uint8_t data_role; /* enum pd_data_role */ + uint8_t vconn_role; /* enum pd_vconn_role */ + uint8_t sink_cap_count; /* Number of Sink Cap PDOs */ + + uint8_t polarity; /* enum tcpc_cc_polarity */ + uint8_t cc_state; /* enum pd_cc_states */ + uint8_t dp_pin; /* DP pin mode (MODE_DP_IN_[A-E]) */ + uint8_t mux_state; /* USB_PD_MUX* - encoded mux state */ + + char tc_state[32]; /* TC state name */ + + uint32_t events; /* PD_STATUS_EVENT bitmask */ + + /* + * BCD PD revisions for partners + * + * The format has the PD major reversion in the upper nibble, and PD + * minor version in the next nibble. Following two nibbles are + * currently 0. + * ex. PD 3.2 would map to 0x3200 + * + * PD major/minor will be 0 if no PD device is connected. + */ + uint16_t sop_revision; + uint16_t sop_prime_revision; + + uint32_t source_cap_pdos[7]; /* Max 7 PDOs can be present */ + + uint32_t sink_cap_pdos[7]; /* Max 7 PDOs can be present */ +} __ec_align1; + /*****************************************************************************/ /* The command range 0x200-0x2FF is reserved for Rotor. */ -- cgit v1.2.3 From 7e890c37c25c7cbca37ff0ab292873d8146e713b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Nov 2020 17:50:04 +0100 Subject: block: add a return value to set_capacity_revalidate_and_notify Return if the function ended up sending an uevent or not. Cc: stable@vger.kernel.org # v5.9 Signed-off-by: Christoph Hellwig Reviewed-by: Petr Vorel Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 38f23d757013..03da3f603d30 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -315,7 +315,7 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, +bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, bool update_bdev); /* drivers/char/random.c */ -- cgit v1.2.3 From af0c351cc34857ad7b254850b9392d99da46be9e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 10 Nov 2020 20:50:02 +0100 Subject: usbnet: switch to core handling of rx/tx byte/packet counters Use netdev->tstats instead of a member of usbnet for storing a pointer to the per-cpu counters. This allows us to use core functionality for statistics handling. Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- include/linux/usb/usbnet.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 2e4f7721fc4e..1f6dfa977e7f 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -65,8 +65,6 @@ struct usbnet { struct usb_anchor deferred; struct tasklet_struct bh; - struct pcpu_sw_netstats __percpu *stats64; - struct work_struct kevent; unsigned long flags; # define EVENT_TX_HALT 0 @@ -285,7 +283,7 @@ extern int usbnet_status_start(struct usbnet *dev, gfp_t mem_flags); extern void usbnet_status_stop(struct usbnet *dev); extern void usbnet_update_max_qlen(struct usbnet *dev); -extern void usbnet_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats); + +#define usbnet_get_stats64 dev_get_tstats64 #endif /* __LINUX_USB_USBNET_H */ -- cgit v1.2.3 From 323955a0498ccaa1263e369b91efd8f4310768b6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 10 Nov 2020 20:51:03 +0100 Subject: net: usb: switch to dev_get_tstats64 and remove usbnet_get_stats64 alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace usbnet_get_stats64() with new identical core function dev_get_tstats64() in all users and remove usbnet_get_stats64(). Signed-off-by: Heiner Kallweit Acked-by: Bjørn Mork Signed-off-by: Jakub Kicinski --- include/linux/usb/usbnet.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 1f6dfa977e7f..88a7673894d5 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -284,6 +284,4 @@ extern void usbnet_status_stop(struct usbnet *dev); extern void usbnet_update_max_qlen(struct usbnet *dev); -#define usbnet_get_stats64 dev_get_tstats64 - #endif /* __LINUX_USB_USBNET_H */ -- cgit v1.2.3 From 6d94e741a8ff818e5518da8257f5ca0aaed1f269 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 10 Nov 2020 19:12:11 -0800 Subject: bpf: Support for pointers beyond pkt_end. This patch adds the verifier support to recognize inlined branch conditions. The LLVM knows that the branch evaluates to the same value, but the verifier couldn't track it. Hence causing valid programs to be rejected. The potential LLVM workaround: https://reviews.llvm.org/D87428 can have undesired side effects, since LLVM doesn't know that skb->data/data_end are being compared. LLVM has to introduce extra boolean variable and use inline_asm trick to force easier for the verifier assembly. Instead teach the verifier to recognize that r1 = skb->data; r1 += 10; r2 = skb->data_end; if (r1 > r2) { here r1 points beyond packet_end and subsequent if (r1 > r2) // always evaluates to "true". } Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Tested-by: Jiri Olsa Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201111031213.25109-2-alexei.starovoitov@gmail.com --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e83ef6f6bf43..306869d4743b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -45,7 +45,7 @@ struct bpf_reg_state { enum bpf_reg_type type; union { /* valid when type == PTR_TO_PACKET */ - u16 range; + int range; /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL -- cgit v1.2.3 From 8d0dd23c6c78d140ed2132f523592ddb4cea839f Mon Sep 17 00:00:00 2001 From: Tal Zussman Date: Thu, 12 Nov 2020 16:56:57 -0500 Subject: syscalls: Fix file comments for syscalls implemented in kernel/sys.c The relevant syscalls were previously moved from kernel/timer.c to kernel/sys.c, but the comments weren't updated to reflect this change. Fixing these comments messes up the alphabetical ordering of syscalls by filename. This could be fixed by merging the two groups of kernel/sys.c syscalls, but that would require reordering the syscalls and renumbering them to maintain the numerical order in unistd.h. Signed-off-by: Tal Zussman Link: https://lore.kernel.org/r/20201112215657.GA4539@charmander' Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 37bea07c12f2..629870fbb2c9 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -744,7 +744,7 @@ asmlinkage long sys_settimeofday(struct __kernel_old_timeval __user *tv, asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p); asmlinkage long sys_adjtimex_time32(struct old_timex32 __user *txc_p); -/* kernel/timer.c */ +/* kernel/sys.c */ asmlinkage long sys_getpid(void); asmlinkage long sys_getppid(void); asmlinkage long sys_getuid(void); -- cgit v1.2.3 From a609c58086e381c13bdad1ba97e6510a13d465e7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 12 Nov 2020 10:58:55 +0000 Subject: tty: serial: 8250: 8250_port: Move prototypes to shared location MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/tty/serial/8250/8250_port.c:349:14: warning: no previous prototype for ‘au_serial_in’ [-Wmissing-prototypes] drivers/tty/serial/8250/8250_port.c:359:6: warning: no previous prototype for ‘au_serial_out’ [-Wmissing-prototypes] Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Mike Hudson Cc: linux-serial@vger.kernel.org Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20201112105857.2078977-3-lee.jones@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 2b70f736b091..9e655055112d 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -187,4 +187,9 @@ extern void serial8250_set_isa_configurator(void (*v) (int port, struct uart_port *up, u32 *capabilities)); +#ifdef CONFIG_SERIAL_8250_RT288X +unsigned int au_serial_in(struct uart_port *p, int offset); +void au_serial_out(struct uart_port *p, int offset, int value); +#endif + #endif -- cgit v1.2.3 From 423f16108c9d832bd96059d5c882c8ef6d76eb96 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 13 Nov 2020 00:59:29 +0000 Subject: bpf: Augment the set of sleepable LSM hooks Update the set of sleepable hooks with the ones that do not trigger a warning with might_fault() when exercised with the correct kernel config options enabled, i.e. DEBUG_ATOMIC_SLEEP=y LOCKDEP=y PROVE_LOCKING=y This means that a sleepable LSM eBPF program can be attached to these LSM hooks. A new helper method bpf_lsm_is_sleepable_hook is added and the set is maintained locally in bpf_lsm.c Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201113005930.541956-2-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 73226181b744..0d1c33ace398 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -27,6 +27,8 @@ extern struct lsm_blob_sizes bpf_lsm_blob_sizes; int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog); +bool bpf_lsm_is_sleepable_hook(u32 btf_id); + static inline struct bpf_storage_blob *bpf_inode( const struct inode *inode) { @@ -54,6 +56,11 @@ void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_LSM */ +static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) +{ + return false; +} + static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog) { -- cgit v1.2.3 From d19ad0775dcd64b49eecf4fa79c17959ebfbd26b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Oct 2020 17:42:17 -0400 Subject: ftrace: Have the callbacks receive a struct ftrace_regs instead of pt_regs In preparation to have arguments of a function passed to callbacks attached to functions as default, change the default callback prototype to receive a struct ftrace_regs as the forth parameter instead of a pt_regs. For callbacks that set the FL_SAVE_REGS flag in their ftrace_ops flags, they will now need to get the pt_regs via a ftrace_get_regs() helper call. If this is called by a callback that their ftrace_ops did not have a FL_SAVE_REGS flag set, it that helper function will return NULL. This will allow the ftrace_regs to hold enough just to get the parameters and stack pointer, but without the worry that callbacks may have a pt_regs that is not completely filled. Acked-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 16 ++++++++++++++-- include/linux/kprobes.h | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8dde9c17aaa5..24e1fa52337d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -90,8 +90,20 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, struct ftrace_ops; +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) +{ + if (!fregs) + return NULL; + + return &fregs->regs; +} + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); @@ -259,7 +271,7 @@ int register_ftrace_function(struct ftrace_ops *ops); int unregister_ftrace_function(struct ftrace_ops *ops); extern void ftrace_stub(unsigned long a0, unsigned long a1, - struct ftrace_ops *op, struct pt_regs *regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); #else /* !CONFIG_FUNCTION_TRACER */ /* diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 629abaf25681..be73350955e4 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -345,7 +345,7 @@ static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs); + struct ftrace_ops *ops, struct ftrace_regs *fregs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); #endif -- cgit v1.2.3 From 02a474ca266a47ea8f4d5a11f4ffa120f83730ad Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 27 Oct 2020 10:55:55 -0400 Subject: ftrace/x86: Allow for arguments to be passed in to ftrace_regs by default Currently, the only way to get access to the registers of a function via a ftrace callback is to set the "FL_SAVE_REGS" bit in the ftrace_ops. But as this saves all regs as if a breakpoint were to trigger (for use with kprobes), it is expensive. The regs are already saved on the stack for the default ftrace callbacks, as that is required otherwise a function being traced will get the wrong arguments and possibly crash. And on x86, the arguments are already stored where they would be on a pt_regs structure to use that code for both the regs version of a callback, it makes sense to pass that information always to all functions. If an architecture does this (as x86_64 now does), it is to set HAVE_DYNAMIC_FTRACE_WITH_ARGS, and this will let the generic code that it could have access to arguments without having to set the flags. This also includes having the stack pointer being saved, which could be used for accessing arguments on the stack, as well as having the function graph tracer not require its own trampoline! Acked-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 24e1fa52337d..588ea7023a7a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -90,16 +90,21 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, struct ftrace_ops; +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + struct ftrace_regs { struct pt_regs regs; }; +#define arch_ftrace_get_regs(fregs) (&(fregs)->regs) + +#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) { if (!fregs) return NULL; - return &fregs->regs; + return arch_ftrace_get_regs(fregs); } typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, -- cgit v1.2.3 From 2860cd8a235375df3c8ec8039d9fe5eb2f658b86 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Oct 2020 17:15:27 -0400 Subject: livepatch: Use the default ftrace_ops instead of REGS when ARGS is available When CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS is available, the ftrace call will be able to set the ip of the calling function. This will improve the performance of live kernel patching where it does not need all the regs to be stored just to change the instruction pointer. If all archs that support live kernel patching also support HAVE_DYNAMIC_FTRACE_WITH_ARGS, then the architecture specific function klp_arch_set_pc() could be made generic. It is possible that an arch can support HAVE_DYNAMIC_FTRACE_WITH_ARGS but not HAVE_DYNAMIC_FTRACE_WITH_REGS and then have access to live patching. Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: live-patching@vger.kernel.org Acked-by: Peter Zijlstra (Intel) Acked-by: Miroslav Benes Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 588ea7023a7a..9a8ce28e4485 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -97,6 +97,13 @@ struct ftrace_regs { }; #define arch_ftrace_get_regs(fregs) (&(fregs)->regs) +/* + * ftrace_instruction_pointer_set() is to be defined by the architecture + * if to allow setting of the instruction pointer from the ftrace_regs + * when HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports + * live kernel patching. + */ +#define ftrace_instruction_pointer_set(fregs, ip) do { } while (0) #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) -- cgit v1.2.3 From e7018751d2e603381ae6028ba4dd21ec45ce38bb Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 13 Nov 2020 14:12:31 -0300 Subject: usb: host: ehci-mxc: Remove the driver The ehci-mxc driver was only used by i.MX non-DT platforms. Since 5.10-rc1, i.MX has been converted to a DT-only platform and all board files are gone. Remove the ehci-mxc driver as there are no more users at all. Acked-by: Alan Stern Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20201113171231.2205-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/usb-ehci-mxc.h | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 include/linux/platform_data/usb-ehci-mxc.h (limited to 'include/linux') diff --git a/include/linux/platform_data/usb-ehci-mxc.h b/include/linux/platform_data/usb-ehci-mxc.h deleted file mode 100644 index ad9794d09bc8..000000000000 --- a/include/linux/platform_data/usb-ehci-mxc.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __INCLUDE_ASM_ARCH_MXC_EHCI_H -#define __INCLUDE_ASM_ARCH_MXC_EHCI_H - -struct mxc_usbh_platform_data { - int (*init)(struct platform_device *pdev); - int (*exit)(struct platform_device *pdev); - - unsigned int portsc; - struct usb_phy *otg; -}; - -#endif /* __INCLUDE_ASM_ARCH_MXC_EHCI_H */ - -- cgit v1.2.3 From 56c62080d5b57dac2c2cdd4a83571450e38ca763 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 13 Nov 2020 22:27:04 +0100 Subject: usb: hcd.h: Remove RUN_CONTEXT The last user of RUN_CONTEXT was removed in commit 97c17beb3b668 ("[PATCH] ehci-hcd (1/2): portability (2.4), tasklet,") in the history.git repo. There are no users of RUN_CONTEXT, remove it. Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20201113212704.2243807-1-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 3dbb42c637c1..96281cd50ff6 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -734,10 +734,6 @@ static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb, /* random stuff */ -#define RUN_CONTEXT (in_irq() ? "in_irq" \ - : (in_interrupt() ? "in_interrupt" : "can sleep")) - - /* This rwsem is for use only by the hub driver and ehci-hcd. * Nobody else should touch it. */ -- cgit v1.2.3 From 8dedcc3eee3aceb37832176f0a1b03d5687acda3 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 24 Sep 2020 11:41:55 +0300 Subject: iio: core: centralize ioctl() calls to the main chardev The aim of this is to improve a bit the organization of ioctl() calls in IIO core. Currently the chardev is split across IIO core sub-modules/files. The main chardev has to be able to handle ioctl() calls, and if we need to add buffer ioctl() calls, this would complicate things. The 'industrialio-core.c' file will provide a 'iio_device_ioctl()' which will iterate over a list of ioctls registered with the IIO device. These can be event ioctl() or buffer ioctl() calls, or something else. Each ioctl() handler will have to return a IIO_IOCTL_UNHANDLED code (which is positive 1), if the ioctl() did not handle the call in any. This eliminates any potential ambiguities about negative error codes, which should fail the call altogether. If any ioctl() returns 0, it was considered that it was serviced successfully and the loop will exit. This change also moves the handling of the IIO_GET_EVENT_FD_IOCTL command inside 'industrialio-event.c', where this is better suited. This patch is a combination of 2 other patches from an older series: Patch 1: iio: core: add simple centralized mechanism for ioctl() handlers Link: https://lore.kernel.org/linux-iio/20200427131100.50845-6-alexandru.ardelean@analog.com/ Patch 2: iio: core: use new common ioctl() mechanism Link: https://lore.kernel.org/linux-iio/20200427131100.50845-7-alexandru.ardelean@analog.com/ Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20200924084155.99406-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio-opaque.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index f2e94196d31f..07c5a8e52ca8 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -11,6 +11,7 @@ * @channel_attr_list: keep track of automatically created channel * attributes * @chan_attr_group: group for all attrs in base directory + * @ioctl_handlers: ioctl handlers registered with the core handler * @debugfs_dentry: device specific debugfs dentry * @cached_reg_addr: cached register address for debugfs reads * @read_buf: read buffer to be used for the initial reg read @@ -22,6 +23,7 @@ struct iio_dev_opaque { struct list_head buffer_list; struct list_head channel_attr_list; struct attribute_group chan_attr_group; + struct list_head ioctl_handlers; #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_dentry; unsigned cached_reg_addr; -- cgit v1.2.3 From 3347acc6fcd4ee71ad18a9ff9d9dac176b517329 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 13 Nov 2020 22:51:59 -0800 Subject: compiler.h: fix barrier_data() on clang Commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") neglected to copy barrier_data() from compiler-gcc.h into compiler-clang.h. The definition in compiler-gcc.h was really to work around clang's more aggressive optimization, so this broke barrier_data() on clang, and consequently memzero_explicit() as well. For example, this results in at least the memzero_explicit() call in lib/crypto/sha256.c:sha256_transform() being optimized away by clang. Fix this by moving the definition of barrier_data() into compiler.h. Also move the gcc/clang definition of barrier() into compiler.h, __memory_barrier() is icc-specific (and barrier() is already defined using it in compiler-intel.h) and doesn't belong in compiler.h. [rdunlap@infradead.org: fix ALPHA builds when SMP is not enabled] Link: https://lkml.kernel.org/r/20201101231835.4589-1-rdunlap@infradead.org Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Signed-off-by: Arvind Sankar Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Tested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Cc: Link: https://lkml.kernel.org/r/20201014212631.207844-1-nivedita@alum.mit.edu Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 6 ------ include/linux/compiler-gcc.h | 19 ------------------- include/linux/compiler.h | 18 ++++++++++++++++-- 3 files changed, 16 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 230604e7f057..dd7233c48bf3 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -60,12 +60,6 @@ #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif -/* The following are for compatibility with GCC, from compiler-gcc.h, - * and may be redefined here because they should not be shared with other - * compilers, like ICC. - */ -#define barrier() __asm__ __volatile__("" : : : "memory") - #if __has_feature(shadow_call_stack) # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5deb37024574..74c6c0486eed 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -15,25 +15,6 @@ # error Sorry, your version of GCC is too old - please use 4.9 or newer. #endif -/* Optimization barrier */ - -/* The "volatile" is due to gcc bugs */ -#define barrier() __asm__ __volatile__("": : :"memory") -/* - * This version is i.e. to prevent dead stores elimination on @ptr - * where gcc and llvm may behave differently when otherwise using - * normal barrier(): while gcc behavior gets along with a normal - * barrier(), llvm needs an explicit input variable to be assumed - * clobbered. The issue is as follows: while the inline asm might - * access any memory it wants, the compiler could have fit all of - * @ptr into memory registers instead, and since @ptr never escaped - * from that, it proved that the inline asm wasn't touching any of - * it. This version works well with both compilers, i.e. we're telling - * the compiler that the inline asm absolutely may see the contents - * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 - */ -#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") - /* * This macro obfuscates arithmetic on a variable address so that gcc * shouldn't recognize the original var, and make assumptions about it. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e512f5505dad..b8fe0c23cfff 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -80,11 +80,25 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Optimization barrier */ #ifndef barrier -# define barrier() __memory_barrier() +/* The "volatile" is due to gcc bugs */ +# define barrier() __asm__ __volatile__("": : :"memory") #endif #ifndef barrier_data -# define barrier_data(ptr) barrier() +/* + * This version is i.e. to prevent dead stores elimination on @ptr + * where gcc and llvm may behave differently when otherwise using + * normal barrier(): while gcc behavior gets along with a normal + * barrier(), llvm needs an explicit input variable to be assumed + * clobbered. The issue is as follows: while the inline asm might + * access any memory it wants, the compiler could have fit all of + * @ptr into memory registers instead, and since @ptr never escaped + * from that, it proved that the inline asm wasn't touching any of + * it. This version works well with both compilers, i.e. we're telling + * the compiler that the inline asm absolutely may see the contents + * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 + */ +# define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") #endif /* workaround for GCC PR82365 if needed */ -- cgit v1.2.3 From 8b21ca0218d29cc6bb7028125c7e5a10dfb4730c Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 13 Nov 2020 22:52:13 -0800 Subject: mm: memcontrol: fix missing wakeup polling thread When we poll the swap.events, we can miss being woken up when the swap event occurs. Because we didn't notify. Fixes: f3a53a3a1e5b ("mm, memcontrol: implement memory.swap.events") Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Roman Gushchin Cc: Michal Hocko Cc: Yafang Shao Cc: Chris Down Cc: Tejun Heo Link: https://lkml.kernel.org/r/20201105161936.98312-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e391e3c56de5..a80c59af2c60 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -900,12 +900,19 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, static inline void memcg_memory_event(struct mem_cgroup *memcg, enum memcg_memory_event event) { + bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || + event == MEMCG_SWAP_FAIL; + atomic_long_inc(&memcg->memory_events_local[event]); - cgroup_file_notify(&memcg->events_local_file); + if (!swap_event) + cgroup_file_notify(&memcg->events_local_file); do { atomic_long_inc(&memcg->memory_events[event]); - cgroup_file_notify(&memcg->events_file); + if (swap_event) + cgroup_file_notify(&memcg->swap_events_file); + else + cgroup_file_notify(&memcg->events_file); if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; -- cgit v1.2.3 From 30d6f8c15d2cd877c1f3d47d8a1064649ebe58e2 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 21 Oct 2020 18:21:46 +0200 Subject: clk: add api to get clk consumer from clk_hw clk_register() is deprecated. Using 'clk' member of struct clk_hw is discouraged. With this constraint, it is difficult for driver to register clocks using the clk_hw API and then use the clock with the consumer API This adds a simple helper, clk_hw_get_clk(), to get a struct clk from a struct clk_hw. Like other clk_get() variant, each call to this helper must be balanced with a call to clk_put(). To make life easier on the consumers, a memory managed version is provided as well. Cc: Martin Blumenstingl Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201021162147.563655-3-jbrunet@baylibre.com Tested-by: Kevin Hilman [sboyd@kernel.org: Fix kernel-doc] Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 03a5de5f99f4..86b707520ec0 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1088,6 +1088,11 @@ static inline struct clk_hw *__clk_get_hw(struct clk *clk) return (struct clk_hw *)clk; } #endif + +struct clk *clk_hw_get_clk(struct clk_hw *hw, const char *con_id); +struct clk *devm_clk_hw_get_clk(struct device *dev, struct clk_hw *hw, + const char *con_id); + unsigned int clk_hw_get_num_parents(const struct clk_hw *hw); struct clk_hw *clk_hw_get_parent(const struct clk_hw *hw); struct clk_hw *clk_hw_get_parent_by_index(const struct clk_hw *hw, -- cgit v1.2.3 From 6d30d50d037dfa092f9d5d1fffa348ab4abb7163 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 21 Oct 2020 18:38:46 +0200 Subject: clk: add devm variant of clk_notifier_register Add a memory managed variant of clk_notifier_register() to make life easier on clock consumers using notifiers Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201021163847.595189-2-jbrunet@baylibre.com Signed-off-by: Stephen Boyd --- include/linux/clk.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 7fd6a1febcf4..f53afdf8198b 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -109,6 +109,16 @@ int clk_notifier_register(struct clk *clk, struct notifier_block *nb); */ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb); +/** + * devm_clk_notifier_register - register a managed rate-change notifier callback + * @dev: device for clock "consumer" + * @clk: clock whose rate we are interested in + * @nb: notifier block with callback function pointer + * + * Returns 0 on success, -EERROR otherwise + */ +int devm_clk_notifier_register(struct device *dev, struct clk *clk, struct notifier_block *nb); + /** * clk_get_accuracy - obtain the clock accuracy in ppb (parts per billion) * for a clock source. -- cgit v1.2.3 From e6fb7aee486c7fbd4d94f4894feaa6f0424c1740 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 21 Oct 2020 18:38:47 +0200 Subject: clk: meson: g12: use devm variant to register notifiers Until now, nothing was done to unregister the dvfs clock notifiers of the Amlogic g12 SoC family. This is not great but this driver was not really expected to be unloaded. With the ongoing effort to build everything as module for this platform, this needs to be cleanly handled. Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201021163847.595189-3-jbrunet@baylibre.com Signed-off-by: Stephen Boyd --- include/linux/clk.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index f53afdf8198b..4ac766dc3daf 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -117,7 +117,8 @@ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb); * * Returns 0 on success, -EERROR otherwise */ -int devm_clk_notifier_register(struct device *dev, struct clk *clk, struct notifier_block *nb); +int devm_clk_notifier_register(struct device *dev, struct clk *clk, + struct notifier_block *nb); /** * clk_get_accuracy - obtain the clock accuracy in ppb (parts per billion) @@ -196,6 +197,13 @@ static inline int clk_notifier_unregister(struct clk *clk, return -ENOTSUPP; } +static inline int devm_clk_notifier_register(struct device *dev, + struct clk *clk, + struct notifier_block *nb) +{ + return -ENOTSUPP; +} + static inline long clk_get_accuracy(struct clk *clk) { return -ENOTSUPP; -- cgit v1.2.3 From f296dcd629aa412a80a53215e46087f53af87f08 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Nov 2020 22:01:45 +0100 Subject: genirq: Remove GENERIC_IRQ_LEGACY_ALLOC_HWIRQ Commit bb9d812643d8 ("arch: remove tile port") removed the last user of this cruft two years ago... Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87eekvac06.fsf@nanos.tec.linutronix.de --- include/linux/irq.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index c54365309e97..79ce314a603b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -954,21 +954,6 @@ static inline void irq_free_desc(unsigned int irq) irq_free_descs(irq, 1); } -#ifdef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ -unsigned int irq_alloc_hwirqs(int cnt, int node); -static inline unsigned int irq_alloc_hwirq(int node) -{ - return irq_alloc_hwirqs(1, node); -} -void irq_free_hwirqs(unsigned int from, int cnt); -static inline void irq_free_hwirq(unsigned int irq) -{ - return irq_free_hwirqs(irq, 1); -} -int arch_setup_hwirq(unsigned int irq, int node); -void arch_teardown_hwirq(unsigned int irq); -#endif - #ifdef CONFIG_GENERIC_IRQ_LEGACY void irq_init_desc(unsigned int irq); #endif -- cgit v1.2.3 From e906a546bd8653ed2e7a14cb300fd17952d7f862 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Nov 2020 23:36:28 +0100 Subject: genirq/irqdomain: Make irq_domain_disassociate() static No users outside of the core code. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87a6vja7mb.fsf@nanos.tec.linutronix.de --- include/linux/irqdomain.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 77bf7d84c673..5701a8b01726 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -387,8 +387,6 @@ extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq, extern void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count); -extern void irq_domain_disassociate(struct irq_domain *domain, - unsigned int irq); extern unsigned int irq_create_mapping(struct irq_domain *host, irq_hw_number_t hwirq); -- cgit v1.2.3 From c4d51a52c67a1e3a0fa3006e5ec21cdc07649cd6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 27 Oct 2020 14:39:43 +0000 Subject: sched/wait: Add add_wait_queue_priority() This allows an exclusive wait_queue_entry to be added at the head of the queue, instead of the tail as normal. Thus, it gets to consume events first without allowing non-exclusive waiters to be woken at all. The (first) intended use is for KVM IRQFD, which currently has inconsistent behaviour depending on whether posted interrupts are available or not. If they are, KVM will bypass the eventfd completely and deliver interrupts directly to the appropriate vCPU. If not, events are delivered through the eventfd and userspace will receive them when polling on the eventfd. By using add_wait_queue_priority(), KVM will be able to consistently consume events within the kernel without accidentally exposing them to userspace when they're supposed to be bypassed. This, in turn, means that userspace doesn't have to jump through hoops to avoid listening on the erroneously noisy eventfd and injecting duplicate interrupts. Signed-off-by: David Woodhouse Message-Id: <20201027143944.648769-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/wait.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 27fb99cfeb02..fe10e8570a52 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -22,6 +22,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int #define WQ_FLAG_BOOKMARK 0x04 #define WQ_FLAG_CUSTOM 0x08 #define WQ_FLAG_DONE 0x10 +#define WQ_FLAG_PRIORITY 0x20 /* * A single wait-queue entry structure: @@ -164,11 +165,20 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add(&wq_entry->entry, &wq_head->head); + struct list_head *head = &wq_head->head; + struct wait_queue_entry *wq; + + list_for_each_entry(wq, &wq_head->head, entry) { + if (!(wq->flags & WQ_FLAG_PRIORITY)) + break; + head = &wq->entry; + } + list_add(&wq_entry->entry, head); } /* -- cgit v1.2.3 From 28f1326710555bbe666f64452d08f2d7dd657cae Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 27 Oct 2020 13:55:21 +0000 Subject: eventfd: Export eventfd_ctx_do_read() Where events are consumed in the kernel, for example by KVM's irqfd_wakeup() and VFIO's virqfd_wakeup(), they currently lack a mechanism to drain the eventfd's counter. Since the wait queue is already locked while the wakeup functions are invoked, all they really need to do is call eventfd_ctx_do_read(). Add a check for the lock, and export it for them. Signed-off-by: David Woodhouse Message-Id: <20201027135523.646811-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/eventfd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index dc4fd8a6644d..fa0a524baed0 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -41,6 +41,7 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); +void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); DECLARE_PER_CPU(int, eventfd_wake_count); @@ -82,6 +83,11 @@ static inline bool eventfd_signal_count(void) return false; } +static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) +{ + +} + #endif #endif /* _LINUX_EVENTFD_H */ -- cgit v1.2.3 From 2f5414423ef577e9e8bdb227f32d0abdd34e4274 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Nov 2020 05:25:09 -0500 Subject: KVM: remove kvm_clear_guest_page kvm_clear_guest_page is not used anymore after "KVM: X86: Don't track dirty for KVM_SET_[TSS_ADDR|IDENTITY_MAP_ADDR]", except from kvm_clear_guest. We can just inline it in its sole user. Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7f2e2a09ebbd..66a4324f329d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -792,7 +792,6 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, offset_in_page(__gpa), v); \ }) -int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); -- cgit v1.2.3 From 28bd726aa404c0da8fd6852fe69bb4538a103b71 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:20:34 -0400 Subject: KVM: Pass in kvm pointer into mark_page_dirty_in_slot() The context will be needed to implement the kvm dirty ring. Signed-off-by: Peter Xu Message-Id: <20201001012044.5151-5-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 66a4324f329d..ca7c1459a8e3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -797,7 +797,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); -void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); +void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From fb04a1eddb1a65b6588a021bdc132270d5ae48bb Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:22 -0400 Subject: KVM: X86: Implement ring-based dirty memory tracking This patch is heavily based on previous work from Lei Cao and Paolo Bonzini . [1] KVM currently uses large bitmaps to track dirty memory. These bitmaps are copied to userspace when userspace queries KVM for its dirty page information. The use of bitmaps is mostly sufficient for live migration, as large parts of memory are be dirtied from one log-dirty pass to another. However, in a checkpointing system, the number of dirty pages is small and in fact it is often bounded---the VM is paused when it has dirtied a pre-defined number of pages. Traversing a large, sparsely populated bitmap to find set bits is time-consuming, as is copying the bitmap to user-space. A similar issue will be there for live migration when the guest memory is huge while the page dirty procedure is trivial. In that case for each dirty sync we need to pull the whole dirty bitmap to userspace and analyse every bit even if it's mostly zeros. The preferred data structure for above scenarios is a dense list of guest frame numbers (GFN). This patch series stores the dirty list in kernel memory that can be memory mapped into userspace to allow speedy harvesting. This patch enables dirty ring for X86 only. However it should be easily extended to other archs as well. [1] https://patchwork.kernel.org/patch/10471409/ Signed-off-by: Lei Cao Signed-off-by: Paolo Bonzini Signed-off-by: Peter Xu Message-Id: <20201001012222.5767-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_dirty_ring.h | 103 +++++++++++++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 13 ++++++ 2 files changed, 116 insertions(+) create mode 100644 include/linux/kvm_dirty_ring.h (limited to 'include/linux') diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h new file mode 100644 index 000000000000..120e5e90fa1d --- /dev/null +++ b/include/linux/kvm_dirty_ring.h @@ -0,0 +1,103 @@ +#ifndef KVM_DIRTY_RING_H +#define KVM_DIRTY_RING_H + +#include + +/** + * kvm_dirty_ring: KVM internal dirty ring structure + * + * @dirty_index: free running counter that points to the next slot in + * dirty_ring->dirty_gfns, where a new dirty page should go + * @reset_index: free running counter that points to the next dirty page + * in dirty_ring->dirty_gfns for which dirty trap needs to + * be reenabled + * @size: size of the compact list, dirty_ring->dirty_gfns + * @soft_limit: when the number of dirty pages in the list reaches this + * limit, vcpu that owns this ring should exit to userspace + * to allow userspace to harvest all the dirty pages + * @dirty_gfns: the array to keep the dirty gfns + * @index: index of this dirty ring + */ +struct kvm_dirty_ring { + u32 dirty_index; + u32 reset_index; + u32 size; + u32 soft_limit; + struct kvm_dirty_gfn *dirty_gfns; + int index; +}; + +#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0) +/* + * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should + * not be included as well, so define these nop functions for the arch. + */ +static inline u32 kvm_dirty_ring_get_rsvd_entries(void) +{ + return 0; +} + +static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, + int index, u32 size) +{ + return 0; +} + +static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) +{ + return NULL; +} + +static inline int kvm_dirty_ring_reset(struct kvm *kvm, + struct kvm_dirty_ring *ring) +{ + return 0; +} + +static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, + u32 slot, u64 offset) +{ +} + +static inline struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, + u32 offset) +{ + return NULL; +} + +static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) +{ +} + +static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) +{ + return true; +} + +#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ + +u32 kvm_dirty_ring_get_rsvd_entries(void); +int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); +struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm); + +/* + * called with kvm->slots_lock held, returns the number of + * processed pages. + */ +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring); + +/* + * returns =0: successfully pushed + * <0: unable to push, need to wait + */ +void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset); + +/* for use in vm_operations_struct */ +struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset); + +void kvm_dirty_ring_free(struct kvm_dirty_ring *ring); +bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring); + +#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ + +#endif /* KVM_DIRTY_RING_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ca7c1459a8e3..864b156391c8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -34,6 +34,7 @@ #include #include +#include #ifndef KVM_MAX_VCPU_ID #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS @@ -319,6 +320,7 @@ struct kvm_vcpu { bool preempted; bool ready; struct kvm_vcpu_arch arch; + struct kvm_dirty_ring dirty_ring; }; static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) @@ -505,6 +507,7 @@ struct kvm { struct srcu_struct irq_srcu; pid_t userspace_pid; unsigned int max_halt_poll_ns; + u32 dirty_ring_size; }; #define kvm_err(fmt, ...) \ @@ -1477,4 +1480,14 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) } #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +/* + * This defines how many reserved entries we want to keep before we + * kick the vcpu to the userspace to avoid dirty ring full. This + * value can be tuned to higher if e.g. PML is enabled on the host. + */ +#define KVM_DIRTY_RING_RSVD_ENTRIES 64 + +/* Max number of entries allowed for each kvm dirty ring */ +#define KVM_DIRTY_RING_MAX_ENTRIES 65536 + #endif -- cgit v1.2.3 From 044c59c409b7fd753707dc437890e94d2b0bd819 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:26 -0400 Subject: KVM: Don't allocate dirty bitmap if dirty ring is enabled Because kvm dirty rings and kvm dirty log is used in an exclusive way, Let's avoid creating the dirty_bitmap when kvm dirty ring is enabled. At the meantime, since the dirty_bitmap will be conditionally created now, we can't use it as a sign of "whether this memory slot enabled dirty tracking". Change users like that to check against the kvm memory slot flags. Note that there still can be chances where the kvm memory slot got its dirty_bitmap allocated, _if_ the memory slots are created before enabling of the dirty rings and at the same time with the dirty tracking capability enabled, they'll still with the dirty_bitmap. However it should not hurt much (e.g., the bitmaps will always be freed if they are there), and the real users normally won't trigger this because dirty bit tracking flag should in most cases only be applied to kvm slots only before migration starts, that should be far latter than kvm initializes (VM starts). Signed-off-by: Peter Xu Message-Id: <20201001012226.5868-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 864b156391c8..f3b1013fb22c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -351,6 +351,11 @@ struct kvm_memory_slot { u16 as_id; }; +static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot) +{ + return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; +} + static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) { return ALIGN(memslot->npages, BITS_PER_LONG) / 8; -- cgit v1.2.3 From 78a56e0494ad29feccd4c54c2b5682721f8cb988 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 4 Nov 2020 15:01:57 -0800 Subject: entry: Fix spelling/typo errors in irq entry code s/reguired/required/ s/Interupts/Interrupts/ s/quiescient/quiescent/ s/assemenbly/assembly/ Signed-off-by: Ira Weiny Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201104230157.3378023-1-ira.weiny@intel.com --- include/linux/entry-common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 1a128baf3628..aab549026ab8 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -415,7 +415,7 @@ void irqentry_exit_cond_resched(void); * @state: Return value from matching call to irqentry_enter() * * Depending on the return target (kernel/user) this runs the necessary - * preemption and work checks if possible and reguired and returns to + * preemption and work checks if possible and required and returns to * the caller with interrupts disabled and no further work pending. * * This is the last action before returning to the low level ASM code which @@ -438,7 +438,7 @@ irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); * @regs: Pointer to pt_regs (NMI entry regs) * @irq_state: Return value from matching call to irqentry_nmi_enter() * - * Last action before returning to the low level assmenbly code. + * Last action before returning to the low level assembly code. * * Counterpart to irqentry_nmi_enter(). */ -- cgit v1.2.3 From 23d89aa0c2192f2d4582198b381d8805492c7925 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Wed, 11 Nov 2020 13:11:18 +0200 Subject: firmware: imx-dsp: Export functions to request/free channels In order to save power, we only need to request a channel when the communication with the DSP active. For this we export the following functions: - imx_dsp_request_channel, gets a channel with a given index - imx_dsp_free_channel, frees a channel with a given index Notice that we still request channels at probe to support devices that do not have PM callbacks implemented. More explanations about why requesting a channel has an effect on power savings: - requesting an mailbox channel will call mailbox's startup function. - startup function calls pm_runtime_get_sync which increments device usage count and will keep the device active. Specifically, mailbox clock will be always ON when a mailbox channel is requested. Signed-off-by: Daniel Baluta Reviewed-by: Paul Olaru Signed-off-by: Shawn Guo --- include/linux/firmware/imx/dsp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/imx/dsp.h b/include/linux/firmware/imx/dsp.h index 7562099c9e46..4f7895a3b73c 100644 --- a/include/linux/firmware/imx/dsp.h +++ b/include/linux/firmware/imx/dsp.h @@ -55,6 +55,9 @@ static inline void *imx_dsp_get_data(struct imx_dsp_ipc *ipc) int imx_dsp_ring_doorbell(struct imx_dsp_ipc *dsp, unsigned int chan_idx); +struct mbox_chan *imx_dsp_request_channel(struct imx_dsp_ipc *ipc, int idx); +void imx_dsp_free_channel(struct imx_dsp_ipc *ipc, int idx); + #else static inline int imx_dsp_ring_doorbell(struct imx_dsp_ipc *ipc, @@ -63,5 +66,12 @@ static inline int imx_dsp_ring_doorbell(struct imx_dsp_ipc *ipc, return -ENOTSUPP; } +struct mbox_chan *imx_dsp_request_channel(struct imx_dsp_ipc *ipc, int idx) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +void imx_dsp_free_channel(struct imx_dsp_ipc *ipc, int idx) { } + #endif #endif /* _IMX_DSP_IPC_H */ -- cgit v1.2.3 From cfeeea60af2f01c13b94d57a9bb1291e7bc181da Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 16 Nov 2020 12:57:13 +0200 Subject: bus: ti-sysc: Implement GPMC debug quirk to drop platform data We need to enable no-reset-on-init quirk for GPMC if the config option for CONFIG_OMAP_GPMC_DEBUG is set. Otherwise the GPMC driver code is unable to show the bootloader configured timings. Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 240dce553a0b..fafc1beea504 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -50,6 +50,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_QUIRK_GPMC_DEBUG BIT(26) #define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25) #define SYSC_MODULE_QUIRK_PRUSS BIT(24) #define SYSC_MODULE_QUIRK_DSS_RESET BIT(23) -- cgit v1.2.3 From ead49373d2916080509f51fc6a4ee8f9bc021b9b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 29 Oct 2020 09:57:16 +0800 Subject: mmc: core: Initial support for SD express card/host In the SD specification v7.10 the SD express card has been added. This new type of removable SD card, can be managed via a PCIe/NVMe based interface, while also allowing backwards compatibility towards the legacy SD interface. To keep the backwards compatibility, it's required to start the initialization through the legacy SD interface. If it turns out that the mmc host and the SD card, both supports the PCIe/NVMe interface, then a switch should be allowed. Therefore, let's introduce some basic support for this type of SD cards to the mmc core. The mmc host, should set MMC_CAP2_SD_EXP if it supports this interface and MMC_CAP2_SD_EXP_1_2V, if also 1.2V is supported, as to inform the core about it. To deal with the switch to the PCIe/NVMe interface, the mmc host is required to implement a new host ops, ->init_sd_express(). Based on the initial communication between the host and the card, host->ios.timing is set to either MMC_TIMING_SD_EXP or MMC_TIMING_SD_EXP_1_2V, depending on if 1.2V is supported or not. In this way, the mmc host can check these values in its ->init_sd_express() ops, to know how to proceed with the handover. Note that, to manage card insert/removal, the mmc core sticks with using the ->get_cd() callback, which means it's the host's responsibility to make sure it provides valid data, even if the card may be managed by PCIe/NVMe at the moment. As long as the card seems to be present, the mmc core keeps the card powered on. Cc: Greg Kroah-Hartman Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Rui Feng Signed-off-by: Ulf Hansson Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/1603936636-3126-1-git-send-email-rui_feng@realsil.com.cn --- include/linux/mmc/host.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index c079b932330f..01bba36545c5 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -60,6 +60,8 @@ struct mmc_ios { #define MMC_TIMING_MMC_DDR52 8 #define MMC_TIMING_MMC_HS200 9 #define MMC_TIMING_MMC_HS400 10 +#define MMC_TIMING_SD_EXP 11 +#define MMC_TIMING_SD_EXP_1_2V 12 unsigned char signal_voltage; /* signalling voltage (1.8V or 3.3V) */ @@ -173,6 +175,9 @@ struct mmc_host_ops { */ int (*multi_io_quirk)(struct mmc_card *card, unsigned int direction, int blk_size); + + /* Initialize an SD express card, mandatory for MMC_CAP2_SD_EXP. */ + int (*init_sd_express)(struct mmc_host *host, struct mmc_ios *ios); }; struct mmc_cqe_ops { @@ -358,6 +363,8 @@ struct mmc_host { #define MMC_CAP2_HS200_1_2V_SDR (1 << 6) /* can support */ #define MMC_CAP2_HS200 (MMC_CAP2_HS200_1_8V_SDR | \ MMC_CAP2_HS200_1_2V_SDR) +#define MMC_CAP2_SD_EXP (1 << 7) /* SD express via PCIe */ +#define MMC_CAP2_SD_EXP_1_2V (1 << 8) /* SD express 1.2V */ #define MMC_CAP2_CD_ACTIVE_HIGH (1 << 10) /* Card-detect signal active high */ #define MMC_CAP2_RO_ACTIVE_HIGH (1 << 11) /* Write-protect signal active high */ #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14) /* Don't power up before scan */ -- cgit v1.2.3 From 5afe802132f242f5520d2acac09ea05d31e3c7cf Mon Sep 17 00:00:00 2001 From: Rui Feng Date: Thu, 29 Oct 2020 09:57:48 +0800 Subject: misc: rtsx: Add SD Express mode support for RTS5261 RTS5261 support SD mode and PCIe/NVMe mode. The workflow is as follows. 1.RTS5261 work in SD mode and set MMC_CAPS2_SD_EXP flag. 2.If card is plugged in, Host send CMD8 to ask card's PCIe availability. 3.If the card has PCIe availability and WP is not set, init_sd_express() will be invoked, RTS5261 switch to PCIe/NVMe mode. 4.Mmc driver handover it to NVMe driver. 5.If card is unplugged, RTS5261 will switch to SD mode. Signed-off-by: Rui Feng Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/1603936668-3363-1-git-send-email-rui_feng@realsil.com.cn Signed-off-by: Ulf Hansson --- include/linux/rtsx_pci.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 745f5e73f99a..b47959f48ccd 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -658,6 +658,19 @@ #define PM_WAKE_EN 0x01 #define PM_CTRL4 0xFF47 +#define RTS5261_FW_CFG0 0xFF54 +#define RTS5261_FW_ENTER_EXPRESS (0x01 << 0) + +#define RTS5261_FW_CFG1 0xFF55 +#define RTS5261_SYS_CLK_SEL_MCU_CLK (0x01 << 7) +#define RTS5261_CRC_CLK_SEL_MCU_CLK (0x01 << 6) +#define RTS5261_FAKE_MCU_CLOCK_GATING (0x01 << 5) +#define RTS5261_MCU_BUS_SEL_MASK (0x01 << 4) +#define RTS5261_MCU_CLOCK_SEL_MASK (0x03 << 2) +#define RTS5261_MCU_CLOCK_SEL_16M (0x01 << 2) +#define RTS5261_MCU_CLOCK_GATING (0x01 << 1) +#define RTS5261_DRIVER_ENABLE_FW (0x01 << 0) + #define REG_CFG_OOBS_OFF_TIMER 0xFEA6 #define REG_CFG_OOBS_ON_TIMER 0xFEA7 #define REG_CFG_VCM_ON_TIMER 0xFEA8 @@ -701,6 +714,13 @@ #define RTS5260_DVCC_TUNE_MASK 0x70 #define RTS5260_DVCC_33 0x70 +/*RTS5261*/ +#define RTS5261_LDO1_CFG0 0xFF72 +#define RTS5261_LDO1_OCP_THD_MASK (0x07 << 5) +#define RTS5261_LDO1_OCP_EN (0x01 << 4) +#define RTS5261_LDO1_OCP_LMT_THD_MASK (0x03 << 2) +#define RTS5261_LDO1_OCP_LMT_EN (0x01 << 1) + #define LDO_VCC_CFG1 0xFF73 #define LDO_VCC_REF_TUNE_MASK 0x30 #define LDO_VCC_REF_1V2 0x20 @@ -741,6 +761,8 @@ #define RTS5260_AUTOLOAD_CFG4 0xFF7F #define RTS5260_MIMO_DISABLE 0x8A +/*RTS5261*/ +#define RTS5261_AUX_CLK_16M_EN (1 << 5) #define RTS5260_REG_GPIO_CTL0 0xFC1A #define RTS5260_REG_GPIO_MASK 0x01 @@ -1191,6 +1213,7 @@ struct rtsx_pcr { #define EXTRA_CAPS_MMC_HS200 (1 << 4) #define EXTRA_CAPS_MMC_8BIT (1 << 5) #define EXTRA_CAPS_NO_MMC (1 << 7) +#define EXTRA_CAPS_SD_EXPRESS (1 << 8) u32 extra_caps; #define IC_VER_A 0 -- cgit v1.2.3 From 6b7b58f425c3359787483479d73c0bb98ffc65b8 Mon Sep 17 00:00:00 2001 From: Rui Feng Date: Tue, 3 Nov 2020 17:54:29 +0800 Subject: mmc: rtsx: Add test mode for RTS5261 This patch add test mode for RTS5261. If test mode is set, reader will switch to SD Express mode mandatorily, and this mode is used by factory testing only. Signed-off-by: Rui Feng Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/1604397269-2780-1-git-send-email-rui_feng@realsil.com.cn Signed-off-by: Ulf Hansson --- include/linux/rtsx_pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index b47959f48ccd..db249e8707f3 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -658,6 +658,10 @@ #define PM_WAKE_EN 0x01 #define PM_CTRL4 0xFF47 +/* FW config info register */ +#define RTS5261_FW_CFG_INFO0 0xFF50 +#define RTS5261_FW_EXPRESS_TEST_MASK (0x01 << 0) +#define RTS5261_FW_EA_MODE_MASK (0x01 << 5) #define RTS5261_FW_CFG0 0xFF54 #define RTS5261_FW_ENTER_EXPRESS (0x01 << 0) -- cgit v1.2.3 From 1672617d512880f31d1d43ca0eb0d13d50b8c680 Mon Sep 17 00:00:00 2001 From: Rui Feng Date: Tue, 3 Nov 2020 17:55:12 +0800 Subject: misc: rtsx: Add hardware auto power off for RTS5261 This patch enable hardware auto power off when card is removed. Signed-off-by: Rui Feng Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/1604397312-2991-1-git-send-email-rui_feng@realsil.com.cn Signed-off-by: Ulf Hansson --- include/linux/rtsx_pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index db249e8707f3..fcaadc7c9df1 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -82,6 +82,7 @@ #define MS_OC_INT_EN (1 << 23) #define SD_OC_INT_EN (1 << 22) +#define RTSX_DUM_REG 0x1C /* * macros for easy use @@ -1272,6 +1273,8 @@ struct rtsx_pcr { #define PCI_PID(pcr) ((pcr)->pci->device) #define is_version(pcr, pid, ver) \ (CHK_PCI_PID(pcr, pid) && (pcr)->ic_version == (ver)) +#define is_version_higher_than(pcr, pid, ver) \ + (CHK_PCI_PID(pcr, pid) && (pcr)->ic_version > (ver)) #define pcr_dbg(pcr, fmt, arg...) \ dev_dbg(&(pcr)->pci->dev, fmt, ##arg) -- cgit v1.2.3 From 13daf48978280ea8bce38f1e0598b913b09f5395 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 9 Nov 2020 22:53:16 +0200 Subject: gpiolib: Replace unsigned by unsigned int Replace unsigned by unsigned int in GPIO library code. Note, legacy API left untouched. Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Reviewed-by: Hans de Goede Reviewed-by: Mika Westerberg --- include/linux/gpio/consumer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 901aab89d025..ef49307611d2 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -158,7 +158,7 @@ int gpiod_set_raw_array_value_cansleep(unsigned int array_size, unsigned long *value_bitmap); int gpiod_set_config(struct gpio_desc *desc, unsigned long config); -int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce); +int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce); int gpiod_set_transitory(struct gpio_desc *desc, bool transitory); void gpiod_toggle_active_low(struct gpio_desc *desc); @@ -481,7 +481,7 @@ static inline int gpiod_set_config(struct gpio_desc *desc, unsigned long config) return -ENOSYS; } -static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) +static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce) { /* GPIO can never have been requested */ WARN_ON(desc); -- cgit v1.2.3 From cc947f2b9c04113d84eeef67cc7c6326e1982019 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2020 10:53:38 +0100 Subject: timers: Make run_local_timers() static No users outside of the timer code. Move the caller below this function to avoid a pointless forward declaration. Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index d10bc7e73b41..fda13c9d1256 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -193,7 +193,6 @@ extern int try_to_del_timer_sync(struct timer_list *timer); #define del_singleshot_timer_sync(t) del_timer_sync(t) extern void init_timers(void); -extern void run_local_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); -- cgit v1.2.3 From 66981c37b3199d293c58f84cf2366e86a06e1a3d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Nov 2020 11:18:14 +0100 Subject: hrtimer: Fix kernel-doc markups The hrtimer_get_remaining() markup is documenting, instead, __hrtimer_get_remaining(), as it is placed at the C file. In order to properly document it, a kernel-doc markup is needed together with the function prototype. So, add a new one, while preserving the existing one, just fixing the function name. The hrtimer_is_queued prototype has a typo: it is using '=' instead of '-' to split: identifier - description as required by kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/9dc87808c2fd07b7e050bafcd033c5ef05808fea.1605521731.git.mchehab+huawei@kernel.org --- include/linux/hrtimer.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 107cedd7019a..bb5e7b0a4274 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -447,6 +447,10 @@ static inline void hrtimer_restart(struct hrtimer *timer) /* Query timers: */ extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); +/** + * hrtimer_get_remaining - get remaining time for the timer + * @timer: the timer to read + */ static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer) { return __hrtimer_get_remaining(timer, false); @@ -458,7 +462,7 @@ extern u64 hrtimer_next_event_without(const struct hrtimer *exclude); extern bool hrtimer_active(const struct hrtimer *timer); /** - * hrtimer_is_queued = check, whether the timer is on one of the queues + * hrtimer_is_queued - check, whether the timer is on one of the queues * @timer: Timer to check * * Returns: True if the timer is queued, false otherwise -- cgit v1.2.3 From e00adcadf3af7a8335026d71ab9f0e0a922191ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Nov 2020 11:00:11 +0100 Subject: block: add a new set_read_only method Add a new method to allow for driver-specific processing when setting or clearing the block device read-only state. This allows to replace the cumbersome and error-prone override of the whole ioctl implementation. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 639cae2c158b..5c1ba8a8d2bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1850,6 +1850,7 @@ struct block_device_operations { void (*unlock_native_capacity) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); + int (*set_read_only)(struct block_device *bdev, bool ro); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, -- cgit v1.2.3 From 98f49b63e84d4ee1a5c327d0b5f4e8699f6c70fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Nov 2020 11:00:17 +0100 Subject: block: remove set_device_ro Fold set_device_ro into its only remaining caller. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 03da3f603d30..52eb592c874a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -304,7 +304,6 @@ extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); -extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); static inline int get_disk_ro(struct gendisk *disk) -- cgit v1.2.3 From a7cb3d2f09c8405aed59d97a7d02cebea43cd3c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Nov 2020 11:00:18 +0100 Subject: block: remove __blkdev_driver_ioctl Just open code it in the few callers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c1ba8a8d2bc..05b346a68c2e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1867,8 +1867,6 @@ extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t, #define blkdev_compat_ptr_ioctl NULL #endif -extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, - unsigned long); extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -- cgit v1.2.3 From a160c6159d4a0cf82f28bc1658a958e278ec3688 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Oct 2020 15:58:28 +0100 Subject: block: add an optional probe callback to major_names Add a callback to the major_names array that allows a driver to override how to probe for dev_t that doesn't currently have a gendisk registered. This will help separating the lookup of the gendisk by dev_t vs probe action for a not currently registered dev_t. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/genhd.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 52eb592c874a..811d0f83c4cf 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -367,7 +367,10 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) -int register_blkdev(unsigned int major, const char *name); +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)); +#define register_blkdev(major, name) \ + __register_blkdev(major, name, NULL) void unregister_blkdev(unsigned int major, const char *name); void revalidate_disk_size(struct gendisk *disk, bool verbose); -- cgit v1.2.3 From d18e8b1bf9e2ee814a7f886a156bf762d52e178b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Oct 2020 15:58:29 +0100 Subject: ide: remove ide_{,un}register_region There is no need to ever register the fake gendisk used for ide-tape. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/ide.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 62653769509f..2c300689a51a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1493,9 +1493,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; } static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} #endif -void ide_register_region(struct gendisk *); -void ide_unregister_region(struct gendisk *); - void ide_check_nien_quirk_list(ide_drive_t *); void ide_undecoded_slave(ide_drive_t *); -- cgit v1.2.3 From e418de3abcda8b102f737919e830024d1455938f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Oct 2020 15:58:41 +0100 Subject: block: switch gendisk lookup to a simple xarray Now that bdev_map is only used for finding gendisks, we can use a simple xarray instead of the regions tracking structure for it. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jens Axboe --- include/linux/genhd.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 811d0f83c4cf..3cbc2781ef34 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -339,15 +339,8 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); int blk_drop_partitions(struct block_device *bdev); extern struct gendisk *__alloc_disk_node(int minors, int node_id); -extern struct kobject *get_disk_and_module(struct gendisk *disk); extern void put_disk(struct gendisk *disk); extern void put_disk_and_module(struct gendisk *disk); -extern void blk_register_region(dev_t devt, unsigned long range, - struct module *module, - struct kobject *(*probe)(dev_t, int *, void *), - int (*lock)(dev_t, void *), - void *data); -extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk_node(minors, node_id) \ ({ \ -- cgit v1.2.3 From 7a089ec7d77fe7d50f6bb7b178fa25eec9fd822b Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 12 Nov 2020 07:04:03 -0500 Subject: console: Delete unused con_font_copy() callback implementations Recently in commit 3c4e0dff2095 ("vt: Disable KD_FONT_OP_COPY") we disabled the KD_FONT_OP_COPY ioctl() option. Delete all the con_font_copy() callbacks, since we no longer use them. Mark KD_FONT_OP_COPY as "obsolete" in include/uapi/linux/kd.h, just like what we have done for PPPIOCDETACH in commit af8d3c7c001a ("ppp: remove the PPPIOCDETACH ioctl"). Signed-off-by: Peilin Ye Reviewed-by: Greg Kroah-Hartman Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/c8d28007edf50de4387e1532eb3eb736db716f73.1605169912.git.yepeilin.cs@gmail.com --- include/linux/console.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 4b1e26c4cb42..20874db50bc8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -62,7 +62,6 @@ struct consw { int (*con_font_get)(struct vc_data *vc, struct console_font *font); int (*con_font_default)(struct vc_data *vc, struct console_font *font, char *name); - int (*con_font_copy)(struct vc_data *vc, int con); int (*con_resize)(struct vc_data *vc, unsigned int width, unsigned int height, unsigned int user); void (*con_set_palette)(struct vc_data *vc, -- cgit v1.2.3 From 4ee573086bd88ff3060dda07873bf755d332e9ba Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 12 Nov 2020 07:13:34 -0500 Subject: Fonts: Add charcount field to font_desc Subsystems are hard-coding the number of characters of our built-in fonts as 256. Include that information in our kernel font descriptor, `struct font_desc`. Signed-off-by: Peilin Ye Reviewed-by: Daniel Vetter Reviewed-by: Greg Kroah-Hartman Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/65952296d1d9486093bd955d1536f7dcd11112c6.1605169912.git.yepeilin.cs@gmail.com --- include/linux/font.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/font.h b/include/linux/font.h index 4f50d736ea72..abf1442ce719 100644 --- a/include/linux/font.h +++ b/include/linux/font.h @@ -17,6 +17,7 @@ struct font_desc { int idx; const char *name; unsigned int width, height; + unsigned int charcount; const void *data; int pref; }; -- cgit v1.2.3 From 449f4ec9892ebc2f37a7eae6d97db2cf7c65e09a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Nov 2020 15:56:56 +0100 Subject: block: remove the update_bdev parameter to set_capacity_revalidate_and_notify The update_bdev argument is always set to true, so remove it. Also rename the function to the slighly less verbose set_capacity_and_notify, as propagating the disk size to the block device isn't really revalidation. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Petr Vorel Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3cbc2781ef34..46553d6d6025 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -314,8 +314,7 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, - bool update_bdev); +bool set_capacity_and_notify(struct gendisk *disk, sector_t size); /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; -- cgit v1.2.3 From cef397038167ac15d085914493d6c86385773709 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Nov 2020 17:52:58 +0100 Subject: arch: pgtable: define MAX_POSSIBLE_PHYSMEM_BITS where needed Stefan Agner reported a bug when using zsram on 32-bit Arm machines with RAM above the 4GB address boundary: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = a27bd01c [00000000] *pgd=236a0003, *pmd=1ffa64003 Internal error: Oops: 207 [#1] SMP ARM Modules linked in: mdio_bcm_unimac(+) brcmfmac cfg80211 brcmutil raspberrypi_hwmon hci_uart crc32_arm_ce bcm2711_thermal phy_generic genet CPU: 0 PID: 123 Comm: mkfs.ext4 Not tainted 5.9.6 #1 Hardware name: BCM2711 PC is at zs_map_object+0x94/0x338 LR is at zram_bvec_rw.constprop.0+0x330/0xa64 pc : [] lr : [] psr: 60000013 sp : e376bbe0 ip : 00000000 fp : c1e2921c r10: 00000002 r9 : c1dda730 r8 : 00000000 r7 : e8ff7a00 r6 : 00000000 r5 : 02f9ffa0 r4 : e3710000 r3 : 000fdffe r2 : c1e0ce80 r1 : ebf979a0 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 30c5383d Table: 235c2a80 DAC: fffffffd Process mkfs.ext4 (pid: 123, stack limit = 0x495a22e6) Stack: (0xe376bbe0 to 0xe376c000) As it turns out, zsram needs to know the maximum memory size, which is defined in MAX_PHYSMEM_BITS when CONFIG_SPARSEMEM is set, or in MAX_POSSIBLE_PHYSMEM_BITS on the x86 architecture. The same problem will be hit on all 32-bit architectures that have a physical address space larger than 4GB and happen to not enable sparsemem and include asm/sparsemem.h from asm/pgtable.h. After the initial discussion, I suggested just always defining MAX_POSSIBLE_PHYSMEM_BITS whenever CONFIG_PHYS_ADDR_T_64BIT is set, or provoking a build error otherwise. This addresses all configurations that can currently have this runtime bug, but leaves all other configurations unchanged. I looked up the possible number of bits in source code and datasheets, here is what I found: - on ARC, CONFIG_ARC_HAS_PAE40 controls whether 32 or 40 bits are used - on ARM, CONFIG_LPAE enables 40 bit addressing, without it we never support more than 32 bits, even though supersections in theory allow up to 40 bits as well. - on MIPS, some MIPS32r1 or later chips support 36 bits, and MIPS32r5 XPA supports up to 60 bits in theory, but 40 bits are more than anyone will ever ship - On PowerPC, there are three different implementations of 36 bit addressing, but 32-bit is used without CONFIG_PTE_64BIT - On RISC-V, the normal page table format can support 34 bit addressing. There is no highmem support on RISC-V, so anything above 2GB is unused, but it might be useful to eventually support CONFIG_ZRAM for high pages. Fixes: 61989a80fb3a ("staging: zsmalloc: zsmalloc memory allocation library") Fixes: 02390b87a945 ("mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS") Acked-by: Thomas Bogendoerfer Reviewed-by: Stefan Agner Tested-by: Stefan Agner Acked-by: Mike Rapoport Link: https://lore.kernel.org/linux-mm/bdfa44bf1c570b05d6c70898e2bbb0acf234ecdf.1604762181.git.stefan@agner.ch/ Signed-off-by: Arnd Bergmann --- include/linux/pgtable.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 71125a4676c4..e237004d498d 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1427,6 +1427,19 @@ typedef unsigned int pgtbl_mod_mask; #endif /* !__ASSEMBLY__ */ +#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT) +#ifdef CONFIG_PHYS_ADDR_T_64BIT +/* + * ZSMALLOC needs to know the highest PFN on 32-bit architectures + * with physical address space extension, but falls back to + * BITS_PER_LONG otherwise. + */ +#error Missing MAX_POSSIBLE_PHYSMEM_BITS definition +#else +#define MAX_POSSIBLE_PHYSMEM_BITS 32 +#endif +#endif + #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 -- cgit v1.2.3 From 557acb3d2cd9c82de19f944f6cc967a347735385 Mon Sep 17 00:00:00 2001 From: Amjad Ouled-Ameur Date: Fri, 13 Nov 2020 00:00:43 +0100 Subject: reset: make shared pulsed reset controls re-triggerable The current reset framework API does not allow to release what is done by reset_control_reset(), IOW decrement triggered_count. Add the new reset_control_rearm() call to do so. When reset_control_reset() has been called once, the counter triggered_count, in the reset framework, is incremented i.e the resource under the reset is in-use and the reset should not be done again. reset_control_rearm() would be the way to state that the resource is no longer used and, that from the caller's perspective, the reset can be fired again if necessary. Signed-off-by: Amjad Ouled-Ameur Reported-by: Jerome Brunet Signed-off-by: Philipp Zabel --- include/linux/reset.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index 05aa9f440f48..439fec7112a9 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -13,6 +13,7 @@ struct reset_control; #ifdef CONFIG_RESET_CONTROLLER int reset_control_reset(struct reset_control *rstc); +int reset_control_rearm(struct reset_control *rstc); int reset_control_assert(struct reset_control *rstc); int reset_control_deassert(struct reset_control *rstc); int reset_control_status(struct reset_control *rstc); -- cgit v1.2.3 From 872f690341948b502c93318f806d821c56772c42 Mon Sep 17 00:00:00 2001 From: Francis Laniel Date: Sun, 15 Nov 2020 18:08:06 +0100 Subject: treewide: rename nla_strlcpy to nla_strscpy. Calls to nla_strlcpy are now replaced by calls to nla_strscpy which is the new name of this function. Signed-off-by: Francis Laniel Reviewed-by: Kees Cook Signed-off-by: Jakub Kicinski --- include/linux/genl_magic_struct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index eeae59d3ceb7..35d21fddaf2d 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -89,7 +89,7 @@ static inline int nla_put_u64_0pad(struct sk_buff *skb, int attrtype, u64 value) nla_get_u64, nla_put_u64_0pad, false) #define __str_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ - nla_strlcpy, nla_put, false) + nla_strscpy, nla_put, false) #define __bin_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ nla_memcpy, nla_put, false) -- cgit v1.2.3 From dd8088d5a8969dc2b42f71d7bc01c25c61a78066 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 10 Nov 2020 17:29:32 +0800 Subject: PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter In many case, we need to check return value of pm_runtime_get_sync, but it brings a trouble to the usage counter processing. Many callers forget to decrease the usage counter when it failed, which could resulted in reference leak. It has been discussed a lot[0][1]. So we add a function to deal with the usage counter for better coding. [0]https://lkml.org/lkml/2020/6/14/88 [1]https://patchwork.ozlabs.org/project/linux-tegra/list/?series=178139 Signed-off-by: Zhang Qilong Acked-by: Rafael J. Wysocki Signed-off-by: Jakub Kicinski --- include/linux/pm_runtime.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 4b708f4e8eed..b492ae00cc90 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -386,6 +386,27 @@ static inline int pm_runtime_get_sync(struct device *dev) return __pm_runtime_resume(dev, RPM_GET_PUT); } +/** + * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. + * @dev: Target device. + * + * Resume @dev synchronously and if that is successful, increment its runtime + * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been + * incremented or a negative error code otherwise. + */ +static inline int pm_runtime_resume_and_get(struct device *dev) +{ + int ret; + + ret = __pm_runtime_resume(dev, RPM_GET_PUT); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } + + return 0; +} + /** * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. * @dev: Target device. -- cgit v1.2.3 From 3136b93c3fb2b7c19e853e049203ff8f2b9dd2cd Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:41:58 -0500 Subject: entry: Expose helpers to migrate TIF to SYSCALL_WORK flags With the goal to split the syscall work related flags into a separate field that is architecture independent, expose transitional helpers that resolve to either the TIF flags or to the corresponding SYSCALL_WORK flags. This will allow architectures to migrate only when they port to the generic syscall entry code. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-3-krisman@collabora.com --- include/linux/thread_info.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e93e249a4e9b..0e9fb15d6b42 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -97,6 +97,38 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) #define test_thread_flag(flag) \ test_ti_thread_flag(current_thread_info(), flag) +#ifdef CONFIG_GENERIC_ENTRY +#define set_syscall_work(fl) \ + set_bit(SYSCALL_WORK_BIT_##fl, ¤t_thread_info()->syscall_work) +#define test_syscall_work(fl) \ + test_bit(SYSCALL_WORK_BIT_##fl, ¤t_thread_info()->syscall_work) +#define clear_syscall_work(fl) \ + clear_bit(SYSCALL_WORK_BIT_##fl, ¤t_thread_info()->syscall_work) + +#define set_task_syscall_work(t, fl) \ + set_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work) +#define test_task_syscall_work(t, fl) \ + test_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work) +#define clear_task_syscall_work(t, fl) \ + clear_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work) + +#else /* CONFIG_GENERIC_ENTRY */ + +#define set_syscall_work(fl) \ + set_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) +#define test_syscall_work(fl) \ + test_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) +#define clear_syscall_work(fl) \ + clear_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) + +#define set_task_syscall_work(t, fl) \ + set_ti_thread_flag(task_thread_info(t), TIF_##fl) +#define test_task_syscall_work(t, fl) \ + test_ti_thread_flag(task_thread_info(t), TIF_##fl) +#define clear_task_syscall_work(t, fl) \ + clear_ti_thread_flag(task_thread_info(t), TIF_##fl) +#endif /* !CONFIG_GENERIC_ENTRY */ + #define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES -- cgit v1.2.3 From b86678cf0f1d76062aa964c5f0c6c89fe5a6dcfd Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:41:59 -0500 Subject: entry: Wire up syscall_work in common entry code Prepare the common entry code to use the SYSCALL_WORK flags. They will be defined in subsequent patches for each type of syscall work. SYSCALL_WORK_ENTRY/EXIT are defined for the transition, as they will replace the TIF_ equivalent defines. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-4-krisman@collabora.com --- include/linux/entry-common.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index aab549026ab8..3fe8f868f15e 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -64,6 +64,9 @@ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK) +#define SYSCALL_WORK_ENTER (0) +#define SYSCALL_WORK_EXIT (0) + /* * TIF flags handled in exit_to_user_mode_loop() */ -- cgit v1.2.3 From 23d67a54857a768acdb0804cdd6037c324a50ecd Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:00 -0500 Subject: seccomp: Migrate to use SYSCALL_WORK flag On architectures using the generic syscall entry code the architecture independent syscall work is moved to flags in thread_info::syscall_work. This removes architecture dependencies and frees up TIF bits. Define SYSCALL_WORK_SECCOMP, use it in the generic entry code and convert the code which uses the TIF specific helper functions to use the new *_syscall_work() helpers which either resolve to the new mode for users of the generic entry code or to the TIF based functions for the other architectures. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-5-krisman@collabora.com --- include/linux/entry-common.h | 8 ++------ include/linux/seccomp.h | 2 +- include/linux/thread_info.h | 6 ++++++ 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 3fe8f868f15e..fa3cdb102dbf 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -21,10 +21,6 @@ # define _TIF_SYSCALL_TRACEPOINT (0) #endif -#ifndef _TIF_SECCOMP -# define _TIF_SECCOMP (0) -#endif - #ifndef _TIF_SYSCALL_AUDIT # define _TIF_SYSCALL_AUDIT (0) #endif @@ -49,7 +45,7 @@ #endif #define SYSCALL_ENTER_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \ ARCH_SYSCALL_ENTER_WORK) @@ -64,7 +60,7 @@ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK) -#define SYSCALL_WORK_ENTER (0) +#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP) #define SYSCALL_WORK_EXIT (0) /* diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 02aef2844c38..47763f3999f7 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -42,7 +42,7 @@ struct seccomp { extern int __secure_computing(const struct seccomp_data *sd); static inline int secure_computing(void) { - if (unlikely(test_thread_flag(TIF_SECCOMP))) + if (unlikely(test_syscall_work(SECCOMP))) return __secure_computing(NULL); return 0; } diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 0e9fb15d6b42..a308ba4ef07b 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -35,6 +35,12 @@ enum { GOOD_STACK, }; +enum syscall_work_bit { + SYSCALL_WORK_BIT_SECCOMP, +}; + +#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) + #include #ifdef __KERNEL__ -- cgit v1.2.3 From 524666cb5de7c38a1925e7401a6e59d68682dd8c Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:01 -0500 Subject: tracepoints: Migrate to use SYSCALL_WORK flag On architectures using the generic syscall entry code the architecture independent syscall work is moved to flags in thread_info::syscall_work. This removes architecture dependencies and frees up TIF bits. Define SYSCALL_WORK_SYSCALL_TRACEPOINT, use it in the generic entry code and convert the code which uses the TIF specific helper functions to use the new *_syscall_work() helpers which either resolve to the new mode for users of the generic entry code or to the TIF based functions for the other architectures. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-6-krisman@collabora.com --- include/linux/entry-common.h | 13 +++++-------- include/linux/thread_info.h | 2 ++ 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index fa3cdb102dbf..2a01eee2dbba 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -17,10 +17,6 @@ # define _TIF_SYSCALL_EMU (0) #endif -#ifndef _TIF_SYSCALL_TRACEPOINT -# define _TIF_SYSCALL_TRACEPOINT (0) -#endif - #ifndef _TIF_SYSCALL_AUDIT # define _TIF_SYSCALL_AUDIT (0) #endif @@ -46,7 +42,7 @@ #define SYSCALL_ENTER_WORK \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \ + _TIF_SYSCALL_EMU | \ ARCH_SYSCALL_ENTER_WORK) /* @@ -58,10 +54,11 @@ #define SYSCALL_EXIT_WORK \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK) + ARCH_SYSCALL_EXIT_WORK) -#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP) -#define SYSCALL_WORK_EXIT (0) +#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ + SYSCALL_WORK_SYSCALL_TRACEPOINT) +#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT) /* * TIF flags handled in exit_to_user_mode_loop() diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index a308ba4ef07b..c232043c12d3 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -37,9 +37,11 @@ enum { enum syscall_work_bit { SYSCALL_WORK_BIT_SECCOMP, + SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) +#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT) #include -- cgit v1.2.3 From 64c19ba29b66e98af9306b4a7525fb22c895d252 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:02 -0500 Subject: ptrace: Migrate to use SYSCALL_TRACE flag On architectures using the generic syscall entry code the architecture independent syscall work is moved to flags in thread_info::syscall_work. This removes architecture dependencies and frees up TIF bits. Define SYSCALL_WORK_SYSCALL_TRACE, use it in the generic entry code and convert the code which uses the TIF specific helper functions to use the new *_syscall_work() helpers which either resolve to the new mode for users of the generic entry code or to the TIF based functions for the other architectures. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-7-krisman@collabora.com --- include/linux/entry-common.h | 10 ++++++---- include/linux/thread_info.h | 2 ++ include/linux/tracehook.h | 17 +++++++++-------- 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 2a01eee2dbba..ae426ab9c372 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -41,7 +41,7 @@ #endif #define SYSCALL_ENTER_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + (_TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_EMU | \ ARCH_SYSCALL_ENTER_WORK) @@ -53,12 +53,14 @@ #endif #define SYSCALL_EXIT_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + (_TIF_SYSCALL_AUDIT | \ ARCH_SYSCALL_EXIT_WORK) #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ - SYSCALL_WORK_SYSCALL_TRACEPOINT) -#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT) + SYSCALL_WORK_SYSCALL_TRACEPOINT | \ + SYSCALL_WORK_SYSCALL_TRACE) +#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ + SYSCALL_WORK_SYSCALL_TRACE) /* * TIF flags handled in exit_to_user_mode_loop() diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index c232043c12d3..761a4590d554 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -38,10 +38,12 @@ enum { enum syscall_work_bit { SYSCALL_WORK_BIT_SECCOMP, SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT, + SYSCALL_WORK_BIT_SYSCALL_TRACE, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) #define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT) +#define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) #include diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index f7d82e4fafd6..3f20368afe9e 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -83,11 +83,12 @@ static inline int ptrace_report_syscall(struct pt_regs *regs, * tracehook_report_syscall_entry - task is about to attempt a system call * @regs: user register state of current task * - * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set, - * when the current task has just entered the kernel for a system call. - * Full user register state is available here. Changing the values - * in @regs can affect the system call number and arguments to be tried. - * It is safe to block here, preventing the system call from beginning. + * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or + * %TIF_SYSCALL_EMU have been set, when the current task has just + * entered the kernel for a system call. Full user register state is + * available here. Changing the values in @regs can affect the system + * call number and arguments to be tried. It is safe to block here, + * preventing the system call from beginning. * * Returns zero normally, or nonzero if the calling arch code should abort * the system call. That must prevent normal entry so no system call is @@ -109,15 +110,15 @@ static inline __must_check int tracehook_report_syscall_entry( * @regs: user register state of current task * @step: nonzero if simulating single-step or block-step * - * This will be called if %TIF_SYSCALL_TRACE has been set, when the - * current task has just finished an attempted system call. Full + * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when + * the current task has just finished an attempted system call. Full * user register state is available here. It is safe to block here, * preventing signals from being processed. * * If @step is nonzero, this report is also in lieu of the normal * trap that would follow the system call instruction because * user_enable_block_step() or user_enable_single_step() was used. - * In this case, %TIF_SYSCALL_TRACE might not be set. + * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. * * Called without locks, just before checking for pending signals. */ -- cgit v1.2.3 From 64eb35f701f04b30706e21d1b02636b5d31a37d2 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:03 -0500 Subject: ptrace: Migrate TIF_SYSCALL_EMU to use SYSCALL_WORK flag On architectures using the generic syscall entry code the architecture independent syscall work is moved to flags in thread_info::syscall_work. This removes architecture dependencies and frees up TIF bits. Define SYSCALL_WORK_SYSCALL_EMU, use it in the generic entry code and convert the code which uses the TIF specific helper functions to use the new *_syscall_work() helpers which either resolve to the new mode for users of the generic entry code or to the TIF based functions for the other architectures. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-8-krisman@collabora.com --- include/linux/entry-common.h | 8 ++------ include/linux/thread_info.h | 2 ++ include/linux/tracehook.h | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index ae426ab9c372..b30f82bed92b 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -13,10 +13,6 @@ * Define dummy _TIF work flags if not defined by the architecture or for * disabled functionality. */ -#ifndef _TIF_SYSCALL_EMU -# define _TIF_SYSCALL_EMU (0) -#endif - #ifndef _TIF_SYSCALL_AUDIT # define _TIF_SYSCALL_AUDIT (0) #endif @@ -42,7 +38,6 @@ #define SYSCALL_ENTER_WORK \ (_TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_EMU | \ ARCH_SYSCALL_ENTER_WORK) /* @@ -58,7 +53,8 @@ #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ SYSCALL_WORK_SYSCALL_TRACEPOINT | \ - SYSCALL_WORK_SYSCALL_TRACE) + SYSCALL_WORK_SYSCALL_TRACE | \ + SYSCALL_WORK_SYSCALL_EMU) #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ SYSCALL_WORK_SYSCALL_TRACE) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 761a4590d554..85b8a4216168 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -39,11 +39,13 @@ enum syscall_work_bit { SYSCALL_WORK_BIT_SECCOMP, SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT, SYSCALL_WORK_BIT_SYSCALL_TRACE, + SYSCALL_WORK_BIT_SYSCALL_EMU, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) #define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT) #define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) +#define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) #include diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 3f20368afe9e..54b925224a13 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -84,7 +84,7 @@ static inline int ptrace_report_syscall(struct pt_regs *regs, * @regs: user register state of current task * * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or - * %TIF_SYSCALL_EMU have been set, when the current task has just + * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just * entered the kernel for a system call. Full user register state is * available here. Changing the values in @regs can affect the system * call number and arguments to be tried. It is safe to block here, -- cgit v1.2.3 From 785dc4eb7fd74e3b7f4eac468457b633117e1aea Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:04 -0500 Subject: audit: Migrate to use SYSCALL_WORK flag On architectures using the generic syscall entry code the architecture independent syscall work is moved to flags in thread_info::syscall_work. This removes architecture dependencies and frees up TIF bits. Define SYSCALL_WORK_SYSCALL_AUDIT, use it in the generic entry code and convert the code which uses the TIF specific helper functions to use the new *_syscall_work() helpers which either resolve to the new mode for users of the generic entry code or to the TIF based functions for the other architectures. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-9-krisman@collabora.com --- include/linux/entry-common.h | 18 ++++++------------ include/linux/thread_info.h | 2 ++ 2 files changed, 8 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index b30f82bed92b..d7b96f42817f 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -13,10 +13,6 @@ * Define dummy _TIF work flags if not defined by the architecture or for * disabled functionality. */ -#ifndef _TIF_SYSCALL_AUDIT -# define _TIF_SYSCALL_AUDIT (0) -#endif - #ifndef _TIF_PATCH_PENDING # define _TIF_PATCH_PENDING (0) #endif @@ -36,9 +32,7 @@ # define ARCH_SYSCALL_ENTER_WORK (0) #endif -#define SYSCALL_ENTER_WORK \ - (_TIF_SYSCALL_AUDIT | \ - ARCH_SYSCALL_ENTER_WORK) +#define SYSCALL_ENTER_WORK ARCH_SYSCALL_ENTER_WORK /* * TIF flags handled in syscall_exit_to_user_mode() @@ -47,16 +41,16 @@ # define ARCH_SYSCALL_EXIT_WORK (0) #endif -#define SYSCALL_EXIT_WORK \ - (_TIF_SYSCALL_AUDIT | \ - ARCH_SYSCALL_EXIT_WORK) +#define SYSCALL_EXIT_WORK ARCH_SYSCALL_EXIT_WORK #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ SYSCALL_WORK_SYSCALL_TRACEPOINT | \ SYSCALL_WORK_SYSCALL_TRACE | \ - SYSCALL_WORK_SYSCALL_EMU) + SYSCALL_WORK_SYSCALL_EMU | \ + SYSCALL_WORK_SYSCALL_AUDIT) #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ - SYSCALL_WORK_SYSCALL_TRACE) + SYSCALL_WORK_SYSCALL_TRACE | \ + SYSCALL_WORK_SYSCALL_AUDIT) /* * TIF flags handled in exit_to_user_mode_loop() diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 85b8a4216168..317363212ae9 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -40,12 +40,14 @@ enum syscall_work_bit { SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT, SYSCALL_WORK_BIT_SYSCALL_TRACE, SYSCALL_WORK_BIT_SYSCALL_EMU, + SYSCALL_WORK_BIT_SYSCALL_AUDIT, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) #define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT) #define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) #define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) +#define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT) #include -- cgit v1.2.3 From 2991552447707d791d9d81a5dc161f9e9e90b163 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:05 -0500 Subject: entry: Drop usage of TIF flags in the generic syscall code Now that the flags migration in the common syscall entry code is complete and the code relies exclusively on thread_info::syscall_work, clean up the accesses to TI flags in that path. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-10-krisman@collabora.com --- include/linux/entry-common.h | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index d7b96f42817f..49b26b216e4e 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -26,31 +26,29 @@ #endif /* - * TIF flags handled in syscall_enter_from_user_mode() + * SYSCALL_WORK flags handled in syscall_enter_from_user_mode() */ -#ifndef ARCH_SYSCALL_ENTER_WORK -# define ARCH_SYSCALL_ENTER_WORK (0) +#ifndef ARCH_SYSCALL_WORK_ENTER +# define ARCH_SYSCALL_WORK_ENTER (0) #endif -#define SYSCALL_ENTER_WORK ARCH_SYSCALL_ENTER_WORK - /* - * TIF flags handled in syscall_exit_to_user_mode() + * SYSCALL_WORK flags handled in syscall_exit_to_user_mode() */ -#ifndef ARCH_SYSCALL_EXIT_WORK -# define ARCH_SYSCALL_EXIT_WORK (0) +#ifndef ARCH_SYSCALL_WORK_EXIT +# define ARCH_SYSCALL_WORK_EXIT (0) #endif -#define SYSCALL_EXIT_WORK ARCH_SYSCALL_EXIT_WORK - #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ SYSCALL_WORK_SYSCALL_TRACEPOINT | \ SYSCALL_WORK_SYSCALL_TRACE | \ SYSCALL_WORK_SYSCALL_EMU | \ - SYSCALL_WORK_SYSCALL_AUDIT) + SYSCALL_WORK_SYSCALL_AUDIT | \ + ARCH_SYSCALL_WORK_ENTER) #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ SYSCALL_WORK_SYSCALL_TRACE | \ - SYSCALL_WORK_SYSCALL_AUDIT) + SYSCALL_WORK_SYSCALL_AUDIT | \ + ARCH_SYSCALL_WORK_EXIT) /* * TIF flags handled in exit_to_user_mode_loop() @@ -136,8 +134,8 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); * * It handles the following work items: * - * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(), - * __secure_computing(), trace_sys_enter() + * 1) syscall_work flag dependent invocations of + * arch_syscall_enter_tracehook(), __secure_computing(), trace_sys_enter() * 2) Invocation of audit_syscall_entry() */ long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); -- cgit v1.2.3 From e8b7db38449ac5b950a3f00519171c4be3e226ff Mon Sep 17 00:00:00 2001 From: Andres Beltran Date: Mon, 9 Nov 2020 11:04:00 +0100 Subject: Drivers: hv: vmbus: Add vmbus_requestor data structure for VMBus hardening Currently, VMbus drivers use pointers into guest memory as request IDs for interactions with Hyper-V. To be more robust in the face of errors or malicious behavior from a compromised Hyper-V, avoid exposing guest memory addresses to Hyper-V. Also avoid Hyper-V giving back a bad request ID that is then treated as the address of a guest data structure with no validation. Instead, encapsulate these memory addresses and provide small integers as request IDs. Signed-off-by: Andres Beltran Co-developed-by: Andrea Parri (Microsoft) Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Reviewed-by: Wei Liu Link: https://lore.kernel.org/r/20201109100402.8946-2-parri.andrea@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 1ce131f29f3b..5b6d5c4e3711 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -764,6 +764,22 @@ enum vmbus_device_type { HV_UNKNOWN, }; +/* + * Provides request ids for VMBus. Encapsulates guest memory + * addresses and stores the next available slot in req_arr + * to generate new ids in constant time. + */ +struct vmbus_requestor { + u64 *req_arr; + unsigned long *req_bitmap; /* is a given slot available? */ + u32 size; + u64 next_request_id; + spinlock_t req_lock; /* provides atomicity */ +}; + +#define VMBUS_NO_RQSTOR U64_MAX +#define VMBUS_RQST_ERROR (U64_MAX - 1) + struct vmbus_device { u16 dev_type; guid_t guid; @@ -988,8 +1004,14 @@ struct vmbus_channel { u32 fuzz_testing_interrupt_delay; u32 fuzz_testing_message_delay; + /* request/transaction ids for VMBus */ + struct vmbus_requestor requestor; + u32 rqstor_size; }; +u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr); +u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id); + static inline bool is_hvsock_channel(const struct vmbus_channel *c) { return !!(c->offermsg.offer.chn_flags & -- cgit v1.2.3 From 4d18fcc95f50950a99bd940d4e61a983f91d267a Mon Sep 17 00:00:00 2001 From: Andres Beltran Date: Mon, 9 Nov 2020 11:04:02 +0100 Subject: hv_netvsc: Use vmbus_requestor to generate transaction IDs for VMBus hardening Currently, pointers to guest memory are passed to Hyper-V as transaction IDs in netvsc. In the face of errors or malicious behavior in Hyper-V, netvsc should not expose or trust the transaction IDs returned by Hyper-V to be valid guest memory addresses. Instead, use small integers generated by vmbus_requestor as requests (transaction) IDs. Signed-off-by: Andres Beltran Co-developed-by: Andrea Parri (Microsoft) Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Acked-by: Jakub Kicinski Reviewed-by: Wei Liu Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20201109100402.8946-4-parri.andrea@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 5b6d5c4e3711..5ddb479c4d4c 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -779,6 +779,7 @@ struct vmbus_requestor { #define VMBUS_NO_RQSTOR U64_MAX #define VMBUS_RQST_ERROR (U64_MAX - 1) +#define VMBUS_RQST_ID_NO_RESPONSE (U64_MAX - 2) struct vmbus_device { u16 dev_type; -- cgit v1.2.3 From f97bb5272d9e95d400d6c8643ebb146b3e3e7842 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 09:08:41 +0100 Subject: sched: Fix data-race in wakeup Mel reported that on some ARM64 platforms loadavg goes bananas and Will tracked it down to the following race: CPU0 CPU1 schedule() prev->sched_contributes_to_load = X; deactivate_task(prev); try_to_wake_up() if (p->on_rq &&) // false if (smp_load_acquire(&p->on_cpu) && // true ttwu_queue_wakelist()) p->sched_remote_wakeup = Y; smp_store_release(prev->on_cpu, 0); where both p->sched_contributes_to_load and p->sched_remote_wakeup are in the same word, and thus the stores X and Y race (and can clobber one another's data). Whereas prior to commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") the p->on_cpu handoff serialized access to p->sched_remote_wakeup (just as it still does with p->sched_contributes_to_load) that commit broke that by calling ttwu_queue_wakelist() with p->on_cpu != 0. However, due to p->XXX = X ttwu() schedule() if (p->on_rq && ...) // false smp_mb__after_spinlock() if (smp_load_acquire(&p->on_cpu) && deactivate_task() ttwu_queue_wakelist()) p->on_rq = 0; p->sched_remote_wakeup = Y; We can be sure any 'current' store is complete and 'current' is guaranteed asleep. Therefore we can move p->sched_remote_wakeup into the current flags word. Note: while the observed failure was loadavg accounting gone wrong due to ttwu() cobbering p->sched_contributes_to_load, the reverse problem is also possible where schedule() clobbers p->sched_remote_wakeup, this could result in enqueue_entity() wrecking ->vruntime and causing scheduling artifacts. Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") Reported-by: Mel Gorman Debugged-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201117083016.GK3121392@hirez.programming.kicks-ass.net --- include/linux/sched.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d383cf09e78f..0e91b451d2a2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -769,7 +769,6 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; - unsigned sched_remote_wakeup:1; #ifdef CONFIG_PSI unsigned sched_psi_wake_requeue:1; #endif @@ -779,6 +778,21 @@ struct task_struct { /* Unserialized, strictly 'current' */ + /* + * This field must not be in the scheduler word above due to wakelist + * queueing no longer being serialized by p->on_cpu. However: + * + * p->XXX = X; ttwu() + * schedule() if (p->on_rq && ..) // false + * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true + * deactivate_task() ttwu_queue_wakelist()) + * p->on_rq = 0; p->sched_remote_wakeup = Y; + * + * guarantees all stores of 'current' are visible before + * ->sched_remote_wakeup gets used, so it can be in this word. + */ + unsigned sched_remote_wakeup:1; + /* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; -- cgit v1.2.3 From 2279f540ea7d05f22d2f0c4224319330228586bc Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 17 Nov 2020 07:14:32 +0100 Subject: sched/deadline: Fix priority inheritance with multiple scheduling classes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Glenn reported that "an application [he developed produces] a BUG in deadline.c when a SCHED_DEADLINE task contends with CFS tasks on nested PTHREAD_PRIO_INHERIT mutexes. I believe the bug is triggered when a CFS task that was boosted by a SCHED_DEADLINE task boosts another CFS task (nested priority inheritance). ------------[ cut here ]------------ kernel BUG at kernel/sched/deadline.c:1462! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 12 PID: 19171 Comm: dl_boost_bug Tainted: ... Hardware name: ... RIP: 0010:enqueue_task_dl+0x335/0x910 Code: ... RSP: 0018:ffffc9000c2bbc68 EFLAGS: 00010002 RAX: 0000000000000009 RBX: ffff888c0af94c00 RCX: ffffffff81e12500 RDX: 000000000000002e RSI: ffff888c0af94c00 RDI: ffff888c10b22600 RBP: ffffc9000c2bbd08 R08: 0000000000000009 R09: 0000000000000078 R10: ffffffff81e12440 R11: ffffffff81e1236c R12: ffff888bc8932600 R13: ffff888c0af94eb8 R14: ffff888c10b22600 R15: ffff888bc8932600 FS: 00007fa58ac55700(0000) GS:ffff888c10b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa58b523230 CR3: 0000000bf44ab003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? intel_pstate_update_util_hwp+0x13/0x170 rt_mutex_setprio+0x1cc/0x4b0 task_blocks_on_rt_mutex+0x225/0x260 rt_spin_lock_slowlock_locked+0xab/0x2d0 rt_spin_lock_slowlock+0x50/0x80 hrtimer_grab_expiry_lock+0x20/0x30 hrtimer_cancel+0x13/0x30 do_nanosleep+0xa0/0x150 hrtimer_nanosleep+0xe1/0x230 ? __hrtimer_init_sleeper+0x60/0x60 __x64_sys_nanosleep+0x8d/0xa0 do_syscall_64+0x4a/0x100 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fa58b52330d ... ---[ end trace 0000000000000002 ]— He also provided a simple reproducer creating the situation below: So the execution order of locking steps are the following (N1 and N2 are non-deadline tasks. D1 is a deadline task. M1 and M2 are mutexes that are enabled * with priority inheritance.) Time moves forward as this timeline goes down: N1 N2 D1 | | | | | | Lock(M1) | | | | | | Lock(M2) | | | | | | Lock(M2) | | | | Lock(M1) | | (!!bug triggered!) | Daniel reported a similar situation as well, by just letting ksoftirqd run with DEADLINE (and eventually block on a mutex). Problem is that boosted entities (Priority Inheritance) use static DEADLINE parameters of the top priority waiter. However, there might be cases where top waiter could be a non-DEADLINE entity that is currently boosted by a DEADLINE entity from a different lock chain (i.e., nested priority chains involving entities of non-DEADLINE classes). In this case, top waiter static DEADLINE parameters could be null (initialized to 0 at fork()) and replenish_dl_entity() would hit a BUG(). Fix this by keeping track of the original donor and using its parameters when a task is boosted. Reported-by: Glenn Elliott Reported-by: Daniel Bristot de Oliveira Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Tested-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201117061432.517340-1-juri.lelli@redhat.com --- include/linux/sched.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0e91b451d2a2..095fdec07b38 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -551,7 +551,6 @@ struct sched_dl_entity { * overruns. */ unsigned int dl_throttled : 1; - unsigned int dl_boosted : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; @@ -570,6 +569,15 @@ struct sched_dl_entity { * time. */ struct hrtimer inactive_timer; + +#ifdef CONFIG_RT_MUTEXES + /* + * Priority Inheritance. When a DEADLINE scheduling entity is boosted + * pi_se points to the donor, otherwise points to the dl_se it belongs + * to (the original one/itself). + */ + struct sched_dl_entity *pi_se; +#endif }; #ifdef CONFIG_UCLAMP_TASK -- cgit v1.2.3 From 95bb7c42ac8a94ce3d0eb059ad64430390351ccb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 13 Nov 2020 00:01:21 +0200 Subject: mm: Add 'mprotect' hook to struct vm_operations_struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Background ========== 1. SGX enclave pages are populated with data by copying from normal memory via ioctl() (SGX_IOC_ENCLAVE_ADD_PAGES), which will be added later in this series. 2. It is desirable to be able to restrict those normal memory data sources. For instance, to ensure that the source data is executable before copying data to an executable enclave page. 3. Enclave page permissions are dynamic (just like normal permissions) and can be adjusted at runtime with mprotect(). This creates a problem because the original data source may have long since vanished at the time when enclave page permissions are established (mmap() or mprotect()). The solution (elsewhere in this series) is to force enclave creators to declare their paging permission *intent* up front to the ioctl(). This intent can be immediately compared to the source data’s mapping and rejected if necessary. The “intent” is also stashed off for later comparison with enclave PTEs. This ensures that any future mmap()/mprotect() operations performed by the enclave creator or done on behalf of the enclave can be compared with the earlier declared permissions. Problem ======= There is an existing mmap() hook which allows SGX to perform this permission comparison at mmap() time. However, there is no corresponding ->mprotect() hook. Solution ======== Add a vm_ops->mprotect() hook so that mprotect() operations which are inconsistent with any page's stashed intent can be rejected by the driver. Signed-off-by: Sean Christopherson Co-developed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Acked-by: Dave Hansen Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: linux-mm@kvack.org Link: https://lkml.kernel.org/r/20201112220135.165028-11-jarkko@kernel.org --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4e..1813fa86b981 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -559,6 +559,13 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); int (*split)(struct vm_area_struct * area, unsigned long addr); int (*mremap)(struct vm_area_struct * area); + /* + * Called by mprotect() to make driver-specific permission + * checks before mprotect() is finalised. The VMA must not + * be modified. Returns 0 if eprotect() can proceed. + */ + int (*mprotect)(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long newflags); vm_fault_t (*fault)(struct vm_fault *vmf); vm_fault_t (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); -- cgit v1.2.3 From 5bdba520c1b318578caffd325515b35d187f8a0e Mon Sep 17 00:00:00 2001 From: Faiyaz Mohammed Date: Mon, 16 Nov 2020 17:05:37 +0530 Subject: mm: memblock: drop __init from memblock functions to make it inline __init is used with inline due to which memblock wraper functions are not getting inline. for example: [ 0.000000] memblock_alloc_try_nid: 1490 bytes align=0x40 nid=-1 from=0x0000000000000000 max_addr=0x0000000000000000 memblock_alloc+0x20/0x2c [ 0.000000] memblock_reserve: [0x000000023f09a3c0-0x000000023f09a991] memblock_alloc_range_nid+0xc0/0x188 Dropping __init from memblock wrapper functions to make it inline and it increase the debugability. After: [ 0.000000] memblock_alloc_try_nid: 1490 bytes align=0x40 nid=-1 from=0x0000000000000000 max_addr=0x0000000000000000 start_kernel+0xa4/0x568 [ 0.000000] memblock_reserve: [0x000000023f09a3c0-0x000000023f09a991] memblock_alloc_range_nid+0xc0/0x188 Signed-off-by: Faiyaz Mohammed Signed-off-by: Mike Rapoport --- include/linux/memblock.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index ef131255cedc..b93c44b9121e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -404,13 +404,13 @@ void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid); -static inline void * __init memblock_alloc(phys_addr_t size, phys_addr_t align) +static __always_inline void *memblock_alloc(phys_addr_t size, phys_addr_t align) { return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); } -static inline void * __init memblock_alloc_raw(phys_addr_t size, +static inline void *memblock_alloc_raw(phys_addr_t size, phys_addr_t align) { return memblock_alloc_try_nid_raw(size, align, MEMBLOCK_LOW_LIMIT, @@ -418,7 +418,7 @@ static inline void * __init memblock_alloc_raw(phys_addr_t size, NUMA_NO_NODE); } -static inline void * __init memblock_alloc_from(phys_addr_t size, +static inline void *memblock_alloc_from(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr) { @@ -426,33 +426,33 @@ static inline void * __init memblock_alloc_from(phys_addr_t size, MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); } -static inline void * __init memblock_alloc_low(phys_addr_t size, +static inline void *memblock_alloc_low(phys_addr_t size, phys_addr_t align) { return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, ARCH_LOW_ADDRESS_LIMIT, NUMA_NO_NODE); } -static inline void * __init memblock_alloc_node(phys_addr_t size, +static inline void *memblock_alloc_node(phys_addr_t size, phys_addr_t align, int nid) { return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, MEMBLOCK_ALLOC_ACCESSIBLE, nid); } -static inline void __init memblock_free_early(phys_addr_t base, +static inline void memblock_free_early(phys_addr_t base, phys_addr_t size) { memblock_free(base, size); } -static inline void __init memblock_free_early_nid(phys_addr_t base, +static inline void memblock_free_early_nid(phys_addr_t base, phys_addr_t size, int nid) { memblock_free(base, size); } -static inline void __init memblock_free_late(phys_addr_t base, phys_addr_t size) +static inline void memblock_free_late(phys_addr_t base, phys_addr_t size) { __memblock_free_late(base, size); } @@ -460,7 +460,7 @@ static inline void __init memblock_free_late(phys_addr_t base, phys_addr_t size) /* * Set the allocation direction to bottom-up or top-down. */ -static inline void __init memblock_set_bottom_up(bool enable) +static inline void memblock_set_bottom_up(bool enable) { memblock.bottom_up = enable; } -- cgit v1.2.3 From 1f90f6a835514cb69bfede0b2752b0cb7a351bbd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Nov 2020 22:45:05 +0200 Subject: resource: Group resource_overlaps() with other inline helpers For better maintenance group resource_overlaps() with other inline helpers. While at it, drop extra parentheses. Signed-off-by: Andy Shevchenko Reviewed-by: Hanjun Guo Tested-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/ioport.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5135d4b86cd6..df4581107536 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -229,6 +229,11 @@ static inline bool resource_contains(struct resource *r1, struct resource *r2) return r1->start <= r2->start && r1->end >= r2->end; } +/* True if any part of r1 overlaps r2 */ +static inline bool resource_overlaps(struct resource *r1, struct resource *r2) +{ + return r1->start <= r2->end && r1->end >= r2->start; +} /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) @@ -296,12 +301,6 @@ extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); -/* True if any part of r1 overlaps r2 */ -static inline bool resource_overlaps(struct resource *r1, struct resource *r2) -{ - return (r1->start <= r2->end && r1->end >= r2->start); -} - struct resource *devm_request_free_mem_region(struct device *dev, struct resource *base, unsigned long size); struct resource *request_free_mem_region(struct resource *base, -- cgit v1.2.3 From 5562f35d7feabfd68cd58a1ee28b451f90e82417 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Nov 2020 22:45:06 +0200 Subject: resource: Introduce resource_union() for overlapping resources Some already present users may utilize resource_union() helper. Provide it for them and for wider use in the future. Signed-off-by: Andy Shevchenko Reviewed-by: Hanjun Guo Tested-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/ioport.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index df4581107536..40320eb5bc0e 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -10,9 +10,10 @@ #define _LINUX_IOPORT_H #ifndef __ASSEMBLY__ +#include #include +#include #include -#include /* * Resources are tree-like, allowing * nesting etc.. @@ -235,6 +236,16 @@ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) return r1->start <= r2->end && r1->end >= r2->start; } +static inline bool +resource_union(struct resource *r1, struct resource *r2, struct resource *r) +{ + if (!resource_overlaps(r1, r2)) + return false; + r->start = min(r1->start, r2->start); + r->end = max(r1->end, r2->end); + return true; +} + /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) -- cgit v1.2.3 From f65674df1b23cdcb6f656a14f659ffea83e7acaa Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Nov 2020 22:45:07 +0200 Subject: resource: Introduce resource_intersection() for overlapping resources There will be at least one user that can utilize new helper. Provide the helper for future user and for wider use. Signed-off-by: Andy Shevchenko Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/ioport.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 40320eb5bc0e..ece1a8db309c 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -236,6 +236,16 @@ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) return r1->start <= r2->end && r1->end >= r2->start; } +static inline bool +resource_intersection(struct resource *r1, struct resource *r2, struct resource *r) +{ + if (!resource_overlaps(r1, r2)) + return false; + r->start = max(r1->start, r2->start); + r->end = min(r1->end, r2->end); + return true; +} + static inline bool resource_union(struct resource *r1, struct resource *r2, struct resource *r) { -- cgit v1.2.3 From 97f53a08cba128a724ebbbf34778d3553d559816 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Nov 2020 13:21:08 -0800 Subject: net: linux/skbuff.h: combine SKB_EXTENSIONS + KCOV handling The previous Kconfig patch led to some other build errors as reported by the 0day bot and my own overnight build testing. These are all in when KCOV is enabled but SKB_EXTENSIONS is not enabled, so fix those by combining those conditions in the header file. Fixes: 6370cc3bbd8a ("net: add kcov handle to skb extensions") Fixes: 85ce50d337d1 ("net: kcov: don't select SKB_EXTENSIONS when there is no NET") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Aleksandr Nogikh Cc: Willem de Bruijn Acked-by: Florian Westphal Link: https://lore.kernel.org/r/20201116212108.32465-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2d01b2bbb746..0a1239819fd2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4608,7 +4608,7 @@ static inline void skb_reset_redirect(struct sk_buff *skb) #endif } -#ifdef CONFIG_KCOV +#if IS_ENABLED(CONFIG_KCOV) && IS_ENABLED(CONFIG_SKB_EXTENSIONS) static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { @@ -4636,7 +4636,7 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { } static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { return 0; } -#endif /* CONFIG_KCOV */ +#endif /* CONFIG_KCOV && CONFIG_SKB_EXTENSIONS */ #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From 172292be01dbd6c26aba23f62e8ec090f31cdb71 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:41 +0100 Subject: dma-mapping: remove dma_virt_ops Now that the RDMA core deals with devices that only do DMA mapping in lower layers properly, there is no user for dma_virt_ops and it can be removed. Link: https://lore.kernel.org/r/20201106181941.1878556-11-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/dma-mapping.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 956151052d45..2aaed35b556d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -565,6 +565,4 @@ static inline int dma_mmap_wc(struct device *dev, int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, dma_addr_t dma_start, u64 size); -extern const struct dma_map_ops dma_virt_ops; - #endif /* _LINUX_DMA_MAPPING_H */ -- cgit v1.2.3 From f73659192b0bdf7bad826587b3530cef43cc048d Mon Sep 17 00:00:00 2001 From: Xie He Date: Sat, 14 Nov 2020 07:09:21 -0800 Subject: net: wan: Delete the DLCI / SDLA drivers The DLCI driver (dlci.c) implements the Frame Relay protocol. However, we already have another newer and better implementation of Frame Relay provided by the HDLC_FR driver (hdlc_fr.c). The DLCI driver's implementation of Frame Relay is used by only one hardware driver in the kernel - the SDLA driver (sdla.c). The SDLA driver provides Frame Relay support for the Sangoma S50x devices. However, the vendor provides their own driver (along with their own multi-WAN-protocol implementations including Frame Relay), called WANPIPE. I believe most users of the hardware would use the vendor-provided WANPIPE driver instead. (The WANPIPE driver was even once in the kernel, but was deleted in commit 8db60bcf3021 ("[WAN]: Remove broken and unmaintained Sangoma drivers.") because the vendor no longer updated the in-kernel WANPIPE driver.) Cc: Mike McLagan Signed-off-by: Xie He Link: https://lore.kernel.org/r/20201114150921.685594-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/if_frad.h | 92 ------------------- include/linux/sdla.h | 240 ------------------------------------------------ 2 files changed, 332 deletions(-) delete mode 100644 include/linux/if_frad.h delete mode 100644 include/linux/sdla.h (limited to 'include/linux') diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h deleted file mode 100644 index 52224de798aa..000000000000 --- a/include/linux/if_frad.h +++ /dev/null @@ -1,92 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * DLCI/FRAD Definitions for Frame Relay Access Devices. DLCI devices are - * created for each DLCI associated with a FRAD. The FRAD driver - * is not truly a network device, but the lower level device - * handler. This allows other FRAD manufacturers to use the DLCI - * code, including its RFC1490 encapsulation alongside the current - * implementation for the Sangoma cards. - * - * Version: @(#)if_ifrad.h 0.15 31 Mar 96 - * - * Author: Mike McLagan - * - * Changes: - * 0.15 Mike McLagan changed structure defs (packed) - * re-arranged flags - * added DLCI_RET vars - */ -#ifndef _FRAD_H_ -#define _FRAD_H_ - -#include - - -#if defined(CONFIG_DLCI) || defined(CONFIG_DLCI_MODULE) - -/* these are the fields of an RFC 1490 header */ -struct frhdr -{ - unsigned char control; - - /* for IP packets, this can be the NLPID */ - unsigned char pad; - - unsigned char NLPID; - unsigned char OUI[3]; - __be16 PID; - -#define IP_NLPID pad -} __packed; - -/* see RFC 1490 for the definition of the following */ -#define FRAD_I_UI 0x03 - -#define FRAD_P_PADDING 0x00 -#define FRAD_P_Q933 0x08 -#define FRAD_P_SNAP 0x80 -#define FRAD_P_CLNP 0x81 -#define FRAD_P_IP 0xCC - -struct dlci_local -{ - struct net_device *master; - struct net_device *slave; - struct dlci_conf config; - int configured; - struct list_head list; - - /* callback function */ - void (*receive)(struct sk_buff *skb, struct net_device *); -}; - -struct frad_local -{ - /* devices which this FRAD is slaved to */ - struct net_device *master[CONFIG_DLCI_MAX]; - short dlci[CONFIG_DLCI_MAX]; - - struct frad_conf config; - int configured; /* has this device been configured */ - int initialized; /* mem_start, port, irq set ? */ - - /* callback functions */ - int (*activate)(struct net_device *, struct net_device *); - int (*deactivate)(struct net_device *, struct net_device *); - int (*assoc)(struct net_device *, struct net_device *); - int (*deassoc)(struct net_device *, struct net_device *); - int (*dlci_conf)(struct net_device *, struct net_device *, int get); - - /* fields that are used by the Sangoma SDLA cards */ - struct timer_list timer; - struct net_device *dev; - int type; /* adapter type */ - int state; /* state of the S502/8 control latch */ - int buffer; /* current buffer for S508 firmware */ -}; - -#endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */ - -extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); - -#endif diff --git a/include/linux/sdla.h b/include/linux/sdla.h deleted file mode 100644 index 00e8b3b614f0..000000000000 --- a/include/linux/sdla.h +++ /dev/null @@ -1,240 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Global definitions for the Frame relay interface. - * - * Version: @(#)if_ifrad.h 0.20 13 Apr 96 - * - * Author: Mike McLagan - * - * Changes: - * 0.15 Mike McLagan Structure packing - * - * 0.20 Mike McLagan New flags for S508 buffer handling - */ -#ifndef SDLA_H -#define SDLA_H - -#include - - -/* important Z80 window addresses */ -#define SDLA_CONTROL_WND 0xE000 - -#define SDLA_502_CMD_BUF 0xEF60 -#define SDLA_502_RCV_BUF 0xA900 -#define SDLA_502_TXN_AVAIL 0xFFF1 -#define SDLA_502_RCV_AVAIL 0xFFF2 -#define SDLA_502_EVENT_FLAGS 0xFFF3 -#define SDLA_502_MDM_STATUS 0xFFF4 -#define SDLA_502_IRQ_INTERFACE 0xFFFD -#define SDLA_502_IRQ_PERMISSION 0xFFFE -#define SDLA_502_DATA_OFS 0x0010 - -#define SDLA_508_CMD_BUF 0xE000 -#define SDLA_508_TXBUF_INFO 0xF100 -#define SDLA_508_RXBUF_INFO 0xF120 -#define SDLA_508_EVENT_FLAGS 0xF003 -#define SDLA_508_MDM_STATUS 0xF004 -#define SDLA_508_IRQ_INTERFACE 0xF010 -#define SDLA_508_IRQ_PERMISSION 0xF011 -#define SDLA_508_TSE_OFFSET 0xF012 - -/* Event flags */ -#define SDLA_EVENT_STATUS 0x01 -#define SDLA_EVENT_DLCI_STATUS 0x02 -#define SDLA_EVENT_BAD_DLCI 0x04 -#define SDLA_EVENT_LINK_DOWN 0x40 - -/* IRQ Trigger flags */ -#define SDLA_INTR_RX 0x01 -#define SDLA_INTR_TX 0x02 -#define SDLA_INTR_MODEM 0x04 -#define SDLA_INTR_COMPLETE 0x08 -#define SDLA_INTR_STATUS 0x10 -#define SDLA_INTR_TIMER 0x20 - -/* DLCI status bits */ -#define SDLA_DLCI_DELETED 0x01 -#define SDLA_DLCI_ACTIVE 0x02 -#define SDLA_DLCI_WAITING 0x04 -#define SDLA_DLCI_NEW 0x08 -#define SDLA_DLCI_INCLUDED 0x40 - -/* valid command codes */ -#define SDLA_INFORMATION_WRITE 0x01 -#define SDLA_INFORMATION_READ 0x02 -#define SDLA_ISSUE_IN_CHANNEL_SIGNAL 0x03 -#define SDLA_SET_DLCI_CONFIGURATION 0x10 -#define SDLA_READ_DLCI_CONFIGURATION 0x11 -#define SDLA_DISABLE_COMMUNICATIONS 0x12 -#define SDLA_ENABLE_COMMUNICATIONS 0x13 -#define SDLA_READ_DLC_STATUS 0x14 -#define SDLA_READ_DLC_STATISTICS 0x15 -#define SDLA_FLUSH_DLC_STATISTICS 0x16 -#define SDLA_LIST_ACTIVE_DLCI 0x17 -#define SDLA_FLUSH_INFORMATION_BUFFERS 0x18 -#define SDLA_ADD_DLCI 0x20 -#define SDLA_DELETE_DLCI 0x21 -#define SDLA_ACTIVATE_DLCI 0x22 -#define SDLA_DEACTIVATE_DLCI 0x23 -#define SDLA_READ_MODEM_STATUS 0x30 -#define SDLA_SET_MODEM_STATUS 0x31 -#define SDLA_READ_COMMS_ERR_STATS 0x32 -#define SDLA_FLUSH_COMMS_ERR_STATS 0x33 -#define SDLA_READ_CODE_VERSION 0x40 -#define SDLA_SET_IRQ_TRIGGER 0x50 -#define SDLA_GET_IRQ_TRIGGER 0x51 - -/* In channel signal types */ -#define SDLA_ICS_LINK_VERIFY 0x02 -#define SDLA_ICS_STATUS_ENQ 0x03 - -/* modem status flags */ -#define SDLA_MODEM_DTR_HIGH 0x01 -#define SDLA_MODEM_RTS_HIGH 0x02 -#define SDLA_MODEM_DCD_HIGH 0x08 -#define SDLA_MODEM_CTS_HIGH 0x20 - -/* used for RET_MODEM interpretation */ -#define SDLA_MODEM_DCD_LOW 0x01 -#define SDLA_MODEM_CTS_LOW 0x02 - -/* return codes */ -#define SDLA_RET_OK 0x00 -#define SDLA_RET_COMMUNICATIONS 0x01 -#define SDLA_RET_CHANNEL_INACTIVE 0x02 -#define SDLA_RET_DLCI_INACTIVE 0x03 -#define SDLA_RET_DLCI_CONFIG 0x04 -#define SDLA_RET_BUF_TOO_BIG 0x05 -#define SDLA_RET_NO_DATA 0x05 -#define SDLA_RET_BUF_OVERSIZE 0x06 -#define SDLA_RET_CIR_OVERFLOW 0x07 -#define SDLA_RET_NO_BUFS 0x08 -#define SDLA_RET_TIMEOUT 0x0A -#define SDLA_RET_MODEM 0x10 -#define SDLA_RET_CHANNEL_OFF 0x11 -#define SDLA_RET_CHANNEL_ON 0x12 -#define SDLA_RET_DLCI_STATUS 0x13 -#define SDLA_RET_DLCI_UNKNOWN 0x14 -#define SDLA_RET_COMMAND_INVALID 0x1F - -/* Configuration flags */ -#define SDLA_DIRECT_RECV 0x0080 -#define SDLA_TX_NO_EXCEPT 0x0020 -#define SDLA_NO_ICF_MSGS 0x1000 -#define SDLA_TX50_RX50 0x0000 -#define SDLA_TX70_RX30 0x2000 -#define SDLA_TX30_RX70 0x4000 - -/* IRQ selection flags */ -#define SDLA_IRQ_RECEIVE 0x01 -#define SDLA_IRQ_TRANSMIT 0x02 -#define SDLA_IRQ_MODEM_STAT 0x04 -#define SDLA_IRQ_COMMAND 0x08 -#define SDLA_IRQ_CHANNEL 0x10 -#define SDLA_IRQ_TIMER 0x20 - -/* definitions for PC memory mapping */ -#define SDLA_8K_WINDOW 0x01 -#define SDLA_S502_SEG_A 0x10 -#define SDLA_S502_SEG_C 0x20 -#define SDLA_S502_SEG_D 0x00 -#define SDLA_S502_SEG_E 0x30 -#define SDLA_S507_SEG_A 0x00 -#define SDLA_S507_SEG_B 0x40 -#define SDLA_S507_SEG_C 0x80 -#define SDLA_S507_SEG_E 0xC0 -#define SDLA_S508_SEG_A 0x00 -#define SDLA_S508_SEG_C 0x10 -#define SDLA_S508_SEG_D 0x08 -#define SDLA_S508_SEG_E 0x18 - -/* SDLA adapter port constants */ -#define SDLA_IO_EXTENTS 0x04 - -#define SDLA_REG_CONTROL 0x00 -#define SDLA_REG_PC_WINDOW 0x01 /* offset for PC window select latch */ -#define SDLA_REG_Z80_WINDOW 0x02 /* offset for Z80 window select latch */ -#define SDLA_REG_Z80_CONTROL 0x03 /* offset for Z80 control latch */ - -#define SDLA_S502_STS 0x00 /* status reg for 502, 502E, 507 */ -#define SDLA_S508_GNRL 0x00 /* general purp. reg for 508 */ -#define SDLA_S508_STS 0x01 /* status reg for 508 */ -#define SDLA_S508_IDR 0x02 /* ID reg for 508 */ - -/* control register flags */ -#define SDLA_S502A_START 0x00 /* start the CPU */ -#define SDLA_S502A_INTREQ 0x02 -#define SDLA_S502A_INTEN 0x04 -#define SDLA_S502A_HALT 0x08 /* halt the CPU */ -#define SDLA_S502A_NMI 0x10 /* issue an NMI to the CPU */ - -#define SDLA_S502E_CPUEN 0x01 -#define SDLA_S502E_ENABLE 0x02 -#define SDLA_S502E_INTACK 0x04 - -#define SDLA_S507_ENABLE 0x01 -#define SDLA_S507_IRQ3 0x00 -#define SDLA_S507_IRQ4 0x20 -#define SDLA_S507_IRQ5 0x40 -#define SDLA_S507_IRQ7 0x60 -#define SDLA_S507_IRQ10 0x80 -#define SDLA_S507_IRQ11 0xA0 -#define SDLA_S507_IRQ12 0xC0 -#define SDLA_S507_IRQ15 0xE0 - -#define SDLA_HALT 0x00 -#define SDLA_CPUEN 0x02 -#define SDLA_MEMEN 0x04 -#define SDLA_S507_EPROMWR 0x08 -#define SDLA_S507_EPROMCLK 0x10 -#define SDLA_S508_INTRQ 0x08 -#define SDLA_S508_INTEN 0x10 - -struct sdla_cmd { - char opp_flag; - char cmd; - short length; - char retval; - short dlci; - char flags; - short rxlost_int; - long rxlost_app; - char reserve[2]; - char data[SDLA_MAX_DATA]; /* transfer data buffer */ -} __attribute__((packed)); - -struct intr_info { - char flags; - short txlen; - char irq; - char flags2; - short timeout; -} __attribute__((packed)); - -/* found in the 508's control window at RXBUF_INFO */ -struct buf_info { - unsigned short rse_num; - unsigned long rse_base; - unsigned long rse_next; - unsigned long buf_base; - unsigned short reserved; - unsigned long buf_top; -} __attribute__((packed)); - -/* structure pointed to by rse_base in RXBUF_INFO struct */ -struct buf_entry { - char opp_flag; - short length; - short dlci; - char flags; - short timestamp; - short reserved[2]; - long buf_addr; -} __attribute__((packed)); - -#endif -- cgit v1.2.3 From 270f3385cddf5db3799b393459476bd9abff89f1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Nov 2020 11:17:59 +0100 Subject: net: core: fix some kernel-doc markups Some identifiers have different names between their prototypes and the kernel-doc markup. In the specific case of netif_subqueue_stopped(), keep the current markup for __netif_subqueue_stopped(), adding a new one for netif_subqueue_stopped(). Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ce648a564f7..03433a4c929e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1490,7 +1490,7 @@ struct net_device_ops { }; /** - * enum net_device_priv_flags - &struct net_device priv_flags + * enum netdev_priv_flags - &struct net_device priv_flags * * These are the &struct net_device, they are only set internally * by drivers and used in the kernel. These flags are invisible to @@ -3602,7 +3602,7 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) } /** - * netif_subqueue_stopped - test status of subqueue + * __netif_subqueue_stopped - test status of subqueue * @dev: network device * @queue_index: sub queue index * @@ -3616,6 +3616,13 @@ static inline bool __netif_subqueue_stopped(const struct net_device *dev, return netif_tx_queue_stopped(txq); } +/** + * netif_subqueue_stopped - test status of subqueue + * @dev: network device + * @skb: sub queue buffer pointer + * + * Check individual transmit queue of a device with multiple transmit queues. + */ static inline bool netif_subqueue_stopped(const struct net_device *dev, struct sk_buff *skb) { -- cgit v1.2.3 From ed5298c7d500abaf34ed7783969e953a1f028e5b Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 28 Sep 2020 09:39:50 +0530 Subject: bus: mhi: Remove auto-start option There is really no point having an auto-start for channels. This is confusing for the device drivers, some have to enable the channels, others don't have... and waste resources (e.g. pre allocated buffers) that may never be used. This is really up to the MHI device(channel) driver to manage the state of its channels. While at it, let's also remove the auto-start option from ath11k mhi controller. Signed-off-by: Loic Poulain Acked-by: Kalle Valo Reviewed-by: Manivannan Sadhasivam [mani: clubbed ath11k change] Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d4841e5a5f45..6522a4adc794 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -214,7 +214,6 @@ enum mhi_db_brst_mode { * @offload_channel: The client manages the channel completely * @doorbell_mode_switch: Channel switches to doorbell mode on M0 transition * @auto_queue: Framework will automatically queue buffers for DL traffic - * @auto_start: Automatically start (open) this channel * @wake-capable: Channel capable of waking up the system */ struct mhi_channel_config { @@ -232,7 +231,6 @@ struct mhi_channel_config { bool offload_channel; bool doorbell_mode_switch; bool auto_queue; - bool auto_start; bool wake_capable; }; -- cgit v1.2.3 From 16fee29b07358293f135759d9fdbf1267da57ebd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 17:02:17 +0100 Subject: dma-mapping: remove the dma_direct_set_offset export Drop the dma_direct_set_offset export and move the declaration to dma-map-ops.h now that the Allwinner drivers have stopped calling it. Signed-off-by: Christoph Hellwig Signed-off-by: Maxime Ripard --- include/linux/dma-map-ops.h | 3 +++ include/linux/dma-mapping.h | 7 ------- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index a5f89fc4d6df..03925e438ec3 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -226,6 +226,9 @@ struct page *dma_alloc_from_pool(struct device *dev, size_t size, bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)); bool dma_free_from_pool(struct device *dev, void *start, size_t size); +int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, + dma_addr_t dma_start, u64 size); + #ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H #include #elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 956151052d45..199d85285246 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -558,13 +558,6 @@ static inline int dma_mmap_wc(struct device *dev, #define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) #endif -/* - * Legacy interface to set up the dma offset map. Drivers really should not - * actually use it, but we have a few legacy cases left. - */ -int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, - dma_addr_t dma_start, u64 size); - extern const struct dma_map_ops dma_virt_ops; #endif /* _LINUX_DMA_MAPPING_H */ -- cgit v1.2.3 From 78e1d22687ff1fd320abac12e8a607ea79782c48 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Fri, 6 Nov 2020 09:44:47 -0800 Subject: bus: mhi: core: Expose mhi_get_exec_env() API for controllers The mhi_get_exec_env() APIs can be used by the controller drivers to query the execution environment of the MHI device. Expose it so it can be used in some scenarios to determine behavior of controllers. Signed-off-by: Bhaumik Bhatt Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d4841e5a5f45..9225d5551d69 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -658,6 +658,12 @@ int mhi_download_rddm_img(struct mhi_controller *mhi_cntrl, bool in_panic); */ int mhi_force_rddm_mode(struct mhi_controller *mhi_cntrl); +/** + * mhi_get_exec_env - Get BHI execution environment of the device + * @mhi_cntrl: MHI controller + */ +enum mhi_ee_type mhi_get_exec_env(struct mhi_controller *mhi_cntrl); + /** * mhi_get_mhi_state - Get MHI state of the device * @mhi_cntrl: MHI controller -- cgit v1.2.3 From 9e1660e5c396ee081907386d0b95b8e0804a6c86 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Fri, 6 Nov 2020 09:44:49 -0800 Subject: bus: mhi: core: Rename RDDM download function to use proper words mhi_download_rddm_img() uses a shorter version of the word image. Expand it and rename the function to mhi_download_rddm_image(). Signed-off-by: Bhaumik Bhatt Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 9225d5551d69..52b3c60bf9bb 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -645,12 +645,12 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl); int mhi_pm_resume(struct mhi_controller *mhi_cntrl); /** - * mhi_download_rddm_img - Download ramdump image from device for - * debugging purpose. + * mhi_download_rddm_image - Download ramdump image from device for + * debugging purpose. * @mhi_cntrl: MHI controller * @in_panic: Download rddm image during kernel panic */ -int mhi_download_rddm_img(struct mhi_controller *mhi_cntrl, bool in_panic); +int mhi_download_rddm_image(struct mhi_controller *mhi_cntrl, bool in_panic); /** * mhi_force_rddm_mode - Force device into rddm mode -- cgit v1.2.3 From 8f70397876872789b2a5deba804eb6216fb5deb7 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Mon, 9 Nov 2020 12:47:21 -0800 Subject: bus: mhi: core: Move to using high priority workqueue MHI work is currently scheduled on the global/system workqueue and can encounter delays on a stressed system. To avoid those unforeseen delays which can hamper bootup or shutdown times, use a dedicated high priority workqueue instead of the global/system workqueue. Signed-off-by: Bhaumik Bhatt Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 52b3c60bf9bb..1ed5f2aa224b 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -337,6 +337,7 @@ struct mhi_controller_config { * @wlock: Lock for protecting device wakeup * @mhi_link_info: Device bandwidth info * @st_worker: State transition worker + * @hiprio_wq: High priority workqueue for MHI work such as state transitions * @state_event: State change event * @status_cb: CB function to notify power states of the device (required) * @wake_get: CB function to assert device wake (optional) @@ -419,6 +420,7 @@ struct mhi_controller { spinlock_t wlock; struct mhi_link_info mhi_link_info; struct work_struct st_worker; + struct workqueue_struct *hiprio_wq; wait_queue_head_t state_event; void (*status_cb)(struct mhi_controller *mhi_cntrl, -- cgit v1.2.3 From 5c62634fc65101d350cbd47722fb76f02693059d Mon Sep 17 00:00:00 2001 From: Hui Su Date: Wed, 18 Nov 2020 00:17:50 +0800 Subject: namespace: make timens_on_fork() return nothing timens_on_fork() always return 0, and maybe not need to judge the return value in copy_namespaces(). So make timens_on_fork() return nothing and do not judge its return val in copy_namespaces(). Signed-off-by: Hui Su Link: https://lore.kernel.org/r/20201117161750.GA45121@rlk Signed-off-by: Christian Brauner --- include/linux/time_namespace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 68770ac9ba89..30312166e70a 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -45,7 +45,7 @@ struct time_namespace *copy_time_ns(unsigned long flags, struct user_namespace *user_ns, struct time_namespace *old_ns); void free_time_ns(struct kref *kref); -int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); +void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); struct vdso_data *arch_get_vdso_data(void *vvar_page); static inline void put_time_ns(struct time_namespace *ns) @@ -136,10 +136,10 @@ struct time_namespace *copy_time_ns(unsigned long flags, return old_ns; } -static inline int timens_on_fork(struct nsproxy *nsproxy, +static inline void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk) { - return 0; + return; } static inline void timens_add_monotonic(struct timespec64 *ts) { } -- cgit v1.2.3 From 13d40ff85da8a85935e3fd47061dee71a02af0d4 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Fri, 13 Nov 2020 12:24:56 -0800 Subject: usb: typec: Correct the bit values for the Thunderbolt rounded/non-rounded cable support Rounded and non-rounded Thunderbolt cables are represented by two bits as per USB Type-C Connector specification v2.0 section F.2.6. Corrected that in the Thunderbolt 3 cable discover mode VDO. Signed-off-by: Utkarsh Patel Reviewed-by: Heikki Krogerus -- Changes in v2: - Removed the fixes tag as there is no functional implication. -- Link: https://lore.kernel.org/r/20201113202503.6559-2-utkarsh.h.patel@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_tbt.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index 47c2d501ddce..aad648d14bb3 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -40,11 +40,16 @@ struct typec_thunderbolt_data { #define TBT_CABLE_USB3_PASSIVE 2 #define TBT_CABLE_10_AND_20GBPS 3 #define TBT_CABLE_ROUNDED BIT(19) +#define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) \ + (((_vdo_) & GENMASK(20, 19)) >> 19) +#define TBT_GEN3_NON_ROUNDED 0 +#define TBT_GEN3_GEN4_ROUNDED_NON_ROUNDED 1 #define TBT_CABLE_OPTICAL BIT(21) #define TBT_CABLE_RETIMER BIT(22) #define TBT_CABLE_LINK_TRAINING BIT(23) #define TBT_SET_CABLE_SPEED(_s_) (((_s_) & GENMASK(2, 0)) << 16) +#define TBT_SET_CABLE_ROUNDED(_g_) (((_g_) & GENMASK(1, 0)) << 19) /* TBT3 Device Enter Mode VDO bits */ #define TBT_ENTER_MODE_CABLE_SPEED(s) TBT_SET_CABLE_SPEED(s) -- cgit v1.2.3 From 523a97aa3b756c1e0efe33ae48d450f8b0052073 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Fri, 13 Nov 2020 12:24:59 -0800 Subject: usb: typec: Remove one bit support for the Thunderbolt rounded/non-rounded cable Two bits support for the Thunderbolt rounded/non-rounded cable has been added to the header file. Hence, removing unused TBT_CABLE_ROUNDED definition from the header file. Signed-off-by: Utkarsh Patel Reviewed-by: Heikki Krogerus -- changes in v2: - Removed the fixes tag as there is no functional implication. -- Link: https://lore.kernel.org/r/20201113202503.6559-5-utkarsh.h.patel@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_tbt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index aad648d14bb3..63dd44b72e0c 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -39,7 +39,6 @@ struct typec_thunderbolt_data { #define TBT_CABLE_USB3_GEN1 1 #define TBT_CABLE_USB3_PASSIVE 2 #define TBT_CABLE_10_AND_20GBPS 3 -#define TBT_CABLE_ROUNDED BIT(19) #define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) \ (((_vdo_) & GENMASK(20, 19)) >> 19) #define TBT_GEN3_NON_ROUNDED 0 -- cgit v1.2.3 From 8a5ca78f603977d6b9b2d7dcb3b1b6ef601d3cc7 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Mon, 16 Nov 2020 12:11:38 -0800 Subject: usb: pd: Add captive Type C cable type The USB Power Delivery Specification R3.0 adds a captive cable type to the "USB Type-C plug to USB Type-C/Captive" field (Bits 19-18, Passive/Active Cable VDO, Table 6-38 & 6-39). Add the corresponding definition to the Cable VDO header. Also add a helper macro to get the Type C cable connector type, when provided the cable VDO. Cc: Heikki Krogerus Signed-off-by: Prashant Malani Reviewed-by: Benson Leung Reviewed-by: Greg Kroah-Hartman Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201116201150.2919178-2-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd_vdo.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h index 68bdc4e2f5a9..8c5cb5830754 100644 --- a/include/linux/usb/pd_vdo.h +++ b/include/linux/usb/pd_vdo.h @@ -177,7 +177,7 @@ * <31:28> :: Cable HW version * <27:24> :: Cable FW version * <23:20> :: Reserved, Shall be set to zero - * <19:18> :: type-C to Type-A/B/C (00b == A, 01 == B, 10 == C) + * <19:18> :: type-C to Type-A/B/C/Captive (00b == A, 01 == B, 10 == C, 11 == Captive) * <17> :: Type-C to Plug/Receptacle (0b == plug, 1b == receptacle) * <16:13> :: cable latency (0001 == <10ns(~1m length)) * <12:11> :: cable termination type (11b == both ends active VCONN req) @@ -193,6 +193,7 @@ #define CABLE_ATYPE 0 #define CABLE_BTYPE 1 #define CABLE_CTYPE 2 +#define CABLE_CAPTIVE 3 #define CABLE_PLUG 0 #define CABLE_RECEPTACLE 1 #define CABLE_CURR_1A5 0 @@ -208,6 +209,7 @@ | (tx1d) << 10 | (tx2d) << 9 | (rx1d) << 8 | (rx2d) << 7 \ | ((cur) & 0x3) << 5 | (vps) << 4 | (sopp) << 3 \ | ((usbss) & 0x7)) +#define VDO_TYPEC_CABLE_TYPE(vdo) (((vdo) >> 18) & 0x3) /* * AMA VDO -- cgit v1.2.3 From a0ccdc4a77a1b36b682ae60361879eca0a0f88d6 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Mon, 16 Nov 2020 12:11:40 -0800 Subject: usb: typec: Add number of altmodes partner attr Add a user-visible attribute for the number of alternate modes available in a partner. This allows userspace to determine whether there are any remaining alternate modes left to be registered by the kernel driver. It can begin executing any policy state machine after all available alternate modes have been registered with the connector class framework. This value is set to "-1" initially, signifying that a valid number of alternate modes haven't been set for the partner. Also add a sysfs file which exposes this attribute. The file remains hidden as long as the attribute value is -1. Cc: Benson Leung Cc: Heikki Krogerus Signed-off-by: Prashant Malani Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201116201150.2919178-3-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 6be558045942..bc6b1a71cb8a 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -126,6 +126,7 @@ struct typec_altmode_desc { enum typec_port_data roles; }; +int typec_partner_set_num_altmodes(struct typec_partner *partner, int num_altmodes); struct typec_altmode *typec_partner_register_altmode(struct typec_partner *partner, const struct typec_altmode_desc *desc); -- cgit v1.2.3 From a30a00e37ceb094f949e4d96c2c586e6503b5d1d Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 28 Oct 2020 23:31:32 -0700 Subject: usb: typec: tcpm: frs sourcing vbus callback During FRS hardware autonomously starts to source vbus. Provide callback to perform chip specific operations. Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201029063138.1429760-5-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index 09762d26fa0c..7303f518ba49 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -83,6 +83,9 @@ enum tcpm_transmit_type { * Optional; Called to enable/disable PD 3.0 fast role swap. * Enabling frs is accessory dependent as not all PD3.0 * accessories support fast role swap. + * @frs_sourcing_vbus: + * Optional; Called to notify that vbus is now being sourced. + * Low level drivers can perform chip specific operations, if any. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -109,6 +112,7 @@ struct tcpc_dev { const struct pd_message *msg); int (*set_bist_data)(struct tcpc_dev *dev, bool on); int (*enable_frs)(struct tcpc_dev *dev, bool enable); + void (*frs_sourcing_vbus)(struct tcpc_dev *dev); }; struct tcpm_port; -- cgit v1.2.3 From f321a02caebdd0c56e167610cda2fa148cd96e8b Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 28 Oct 2020 23:31:35 -0700 Subject: usb: typec: tcpm: Implement enabling Auto Discharge disconnect support TCPCI spec allows TCPC hardware to autonomously discharge the vbus capacitance upon disconnect. The expectation is that the TCPM enables AutoDischargeDisconnect while entering SNK/SRC_ATTACHED states. Hardware then automously discharges vbus when the vbus falls below a certain threshold i.e. VBUS_SINK_DISCONNECT_THRESHOLD. Apart from enabling the vbus discharge circuit, AutoDischargeDisconnect is also used a flag to move TCPCI based TCPC implementations into Attached.Snk/Attached.Src state as mentioned in Figure 4-15. TCPC State Diagram before a Connection of the USB Type-C Port Controller Interface Specification. In such TCPC implementations, setting AutoDischargeDisconnect would prevent TCPC into entering "Connection_Invalid" state as well. Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201029063138.1429760-8-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index 7303f518ba49..e68aaa12886f 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -86,6 +86,18 @@ enum tcpm_transmit_type { * @frs_sourcing_vbus: * Optional; Called to notify that vbus is now being sourced. * Low level drivers can perform chip specific operations, if any. + * @enable_auto_vbus_discharge: + * Optional; TCPCI spec based TCPC implementations can optionally + * support hardware to autonomously dischrge vbus upon disconnecting + * as sink or source. TCPM signals TCPC to enable the mechanism upon + * entering connected state and signals disabling upon disconnect. + * @set_auto_vbus_discharge_threshold: + * Mandatory when enable_auto_vbus_discharge is implemented. TCPM + * calls this function to allow lower levels drivers to program the + * vbus threshold voltage below which the vbus discharge circuit + * will be turned on. requested_vbus_voltage is set to 0 when vbus + * is going to disappear knowingly i.e. during PR_SWAP and + * HARD_RESET etc. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -113,6 +125,9 @@ struct tcpc_dev { int (*set_bist_data)(struct tcpc_dev *dev, bool on); int (*enable_frs)(struct tcpc_dev *dev, bool enable); void (*frs_sourcing_vbus)(struct tcpc_dev *dev); + int (*enable_auto_vbus_discharge)(struct tcpc_dev *dev, bool enable); + int (*set_auto_vbus_discharge_threshold)(struct tcpc_dev *dev, enum typec_pwr_opmode mode, + bool pps_active, u32 requested_vbus_voltage); }; struct tcpm_port; -- cgit v1.2.3 From e1e52361c61afdf81d81cfbbfa3ce08971e60f50 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Mon, 16 Nov 2020 12:11:42 -0800 Subject: usb: typec: Add plug num_altmodes sysfs attr Add a field to the typec_plug struct to record the number of available altmodes as well as the corresponding sysfs attribute to expose this to userspace. This allows userspace to determine whether there are any remaining alternate modes left to be registered by the kernel driver. It can begin executing any policy state machine after all available alternate modes have been registered with the connector class framework. This value is set to "-1" initially, signifying that a valid number of alternate modes haven't been set for the plug. The sysfs file remains hidden as long as the attribute value is -1. We re-use the partner attribute for number_of_alternate_modes since the usage and name is similar, and update the corresponding *_show() command to support both partner and plugs. Signed-off-by: Prashant Malani Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201116201150.2919178-4-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index bc6b1a71cb8a..54475323f83b 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -130,6 +130,7 @@ int typec_partner_set_num_altmodes(struct typec_partner *partner, int num_altmod struct typec_altmode *typec_partner_register_altmode(struct typec_partner *partner, const struct typec_altmode_desc *desc); +int typec_plug_set_num_altmodes(struct typec_plug *plug, int num_altmodes); struct typec_altmode *typec_plug_register_altmode(struct typec_plug *plug, const struct typec_altmode_desc *desc); -- cgit v1.2.3 From 4d213e76a359e540ca786ee937da7f35faa8e5f8 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Tue, 10 Nov 2020 15:19:08 +0800 Subject: iommu/vt-d: Avoid panic if iommu init fails in tboot system "intel_iommu=off" command line is used to disable iommu but iommu is force enabled in a tboot system for security reason. However for better performance on high speed network device, a new option "intel_iommu=tboot_noforce" is introduced to disable the force on. By default kernel should panic if iommu init fail in tboot for security reason, but it's unnecessory if we use "intel_iommu=tboot_noforce,off". Fix the code setting force_on and move intel_iommu_tboot_noforce from tboot code to intel iommu code. Fixes: 7304e8f28bb2 ("iommu/vt-d: Correctly disable Intel IOMMU force on") Signed-off-by: Zhenzhong Duan Tested-by: Lukasz Hawrylko Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20201110071908.3133-1-zhenzhong.duan@gmail.com Signed-off-by: Will Deacon --- include/linux/intel-iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index fbf5b3e7707e..d956987ed032 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -798,7 +798,6 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; -extern int intel_iommu_tboot_noforce; extern int intel_iommu_gfx_mapped; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) -- cgit v1.2.3 From d04a53b1c48766665806eb75b73137734abdaa95 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Tue, 17 Nov 2020 22:38:26 +0100 Subject: ptp: document struct ptp_clock_request members It's arguable most people interested in configuring a PPS signal want it as external output, not as kernel input. PTP_CLK_REQ_PPS is for input though. Add documentation to nudge readers into the correct direction. Signed-off-by: Ahmad Fatoum Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20201117213826.18235-1-a.fatoum@pengutronix.de Signed-off-by: Jakub Kicinski --- include/linux/ptp_clock_kernel.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index d3e8ba5c7125..0d47fd33b228 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -12,6 +12,19 @@ #include #include +/** + * struct ptp_clock_request - request PTP clock event + * + * @type: The type of the request. + * EXTTS: Configure external trigger timestamping + * PEROUT: Configure periodic output signal (e.g. PPS) + * PPS: trigger internal PPS event for input + * into kernel PPS subsystem + * @extts: describes configuration for external trigger timestamping. + * This is only valid when event == PTP_CLK_REQ_EXTTS. + * @perout: describes configuration for periodic output. + * This is only valid when event == PTP_CLK_REQ_PEROUT. + */ struct ptp_clock_request { enum { -- cgit v1.2.3 From d055126180564a57fe533728a4e93d0cb53d49b3 Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Tue, 17 Nov 2020 18:45:49 +0000 Subject: bpf: Add bpf_ktime_get_coarse_ns helper The helper uses CLOCK_MONOTONIC_COARSE source of time that is less accurate but more performant. We have a BPF CGROUP_SKB firewall that supports event logging through bpf_perf_event_output(). Each event has a timestamp and currently we use bpf_ktime_get_ns() for it. Use of bpf_ktime_get_coarse_ns() saves ~15-20 ns in time required for event logging. bpf_ktime_get_ns(): EgressLogByRemoteEndpoint 113.82ns 8.79M bpf_ktime_get_coarse_ns(): EgressLogByRemoteEndpoint 95.40ns 10.48M Signed-off-by: Dmitrii Banshchikov Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201117184549.257280-1-me@ubique.spb.ru --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 581b2a2e78eb..e1bcb6d7345c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1842,6 +1842,7 @@ extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; +extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); -- cgit v1.2.3 From f2bcc2fa275b913093ff5b403be5d11342fdb055 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 16 Nov 2020 17:21:15 +0100 Subject: atm: nicstar: Replace in_interrupt() usage push_scqe() uses in_interrupt() to figure out if it is allowed to sleep. The usage of in_interrupt() in drivers is phased out and Linus clearly requested that code which changes behaviour depending on context should either be separated or the context be conveyed in an argument passed by the caller, which usually knows the context. Aside of that in_interrupt() is not correct as it does not catch preempt disabled regions which neither can sleep. ns_send() (the only caller of push_scqe()) has the following callers: - vcc_sendmsg() used as proto_ops::sendmsg is expected to be invoked in preemtible context. -> vcc->dev->ops->send() (ns_send()) - atm_vcc::send via atmdev_ops::send either directly (pointer copied by atm_init_aal34() or atm_init_aal5()) or via atm_send_aal0(). This is invoked by drivers (like br2684, clip, pppoatm, ...) which are called from net_device_ops::ndo_start_xmit with BH disabled. Add atmdev_ops::send_bh which is used by callers from BH context (atm_send_aal*()) and if this callback missing then ::send is used instead. Implement this callback in nicstar and use it to replace in_interrupt(). Cc: Chas Williams <3chas3@gmail.com> Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jakub Kicinski --- include/linux/atmdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 5d5ff2203fa2..d7493016cd46 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -186,6 +186,7 @@ struct atmdev_ops { /* only send is required */ void __user *arg); #endif int (*send)(struct atm_vcc *vcc,struct sk_buff *skb); + int (*send_bh)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags); void (*phy_put)(struct atm_dev *dev,unsigned char value, unsigned long addr); -- cgit v1.2.3 From 4abb1e5b63ac3281275315fc6b0cde0b9c2e2e42 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:17 +0100 Subject: powerpc/mm: factor out creating/removing linear mapping We want to stop abusing memory hotplug infrastructure in memtrace code to perform allocations and remove the linear mapping. Instead we will use alloc_contig_pages() and remove the linear mapping manually. Let's factor out creating/removing the linear mapping into arch_create_linear_mapping() / arch_remove_linear_mapping() - so in the future, we might be able to have whole arch_add_memory() / arch_remove_memory() be implemented in common code. Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-4-david@redhat.com --- include/linux/memory_hotplug.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index d65c6fdc5cfc..00b9e9bd3850 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -375,6 +375,9 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn, unsigned long nr_pages); +extern int arch_create_linear_mapping(int nid, u64 start, u64 size, + struct mhp_params *params); +void arch_remove_linear_mapping(u64 start, u64 size); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __LINUX_MEMORY_HOTPLUG_H */ -- cgit v1.2.3 From 3b95bc57c86b064fd140ccec3642ad14f40b687f Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Wed, 18 Nov 2020 22:49:27 -0800 Subject: Input: adp5589-keys - remove setup/teardown hooks for gpios This is currently just dead code. It's from around a time when platform-data was used, and a board could hook it's own special callback for setup/teardown, and a private object (via 'context'). This change removes it, as there are no more users in mainline for this. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201112074308.71351-4-alexandru.ardelean@analog.com Signed-off-by: Dmitry Torokhov --- include/linux/input/adp5589.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/input/adp5589.h b/include/linux/input/adp5589.h index c0523af96893..0e4742c8c81e 100644 --- a/include/linux/input/adp5589.h +++ b/include/linux/input/adp5589.h @@ -175,13 +175,6 @@ struct i2c_client; /* forward declaration */ struct adp5589_gpio_platform_data { int gpio_start; /* GPIO Chip base # */ - int (*setup)(struct i2c_client *client, - int gpio, unsigned ngpio, - void *context); - int (*teardown)(struct i2c_client *client, - int gpio, unsigned ngpio, - void *context); - void *context; }; #endif -- cgit v1.2.3 From 86b9d170da98bae13b307d621638954aef645331 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Tue, 10 Nov 2020 17:13:37 +0100 Subject: mfd: syscon: Add syscon_regmap_lookup_by_phandle_optional() function. This adds syscon_regmap_lookup_by_phandle_optional() function to get an optional regmap. It behaves the same as syscon_regmap_lookup_by_phandle() except where there is no regmap phandle. In this case, instead of returning -ENODEV, the function returns NULL. This makes error checking simpler when the regmap phandle is optional. Suggested-by: Nicolas Boichat Signed-off-by: Enric Balletbo i Serra Reviewed-by: Matthias Brugger Reviewed-by: Arnd Bergmann Signed-off-by: Lee Jones --- include/linux/mfd/syscon.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h index 7f20e9b502a5..fecc2fa2a364 100644 --- a/include/linux/mfd/syscon.h +++ b/include/linux/mfd/syscon.h @@ -28,6 +28,9 @@ extern struct regmap *syscon_regmap_lookup_by_phandle_args( const char *property, int arg_count, unsigned int *out_args); +extern struct regmap *syscon_regmap_lookup_by_phandle_optional( + struct device_node *np, + const char *property); #else static inline struct regmap *device_node_to_regmap(struct device_node *np) { @@ -59,6 +62,14 @@ static inline struct regmap *syscon_regmap_lookup_by_phandle_args( { return ERR_PTR(-ENOTSUPP); } + +static inline struct regmap *syscon_regmap_lookup_by_phandle_optional( + struct device_node *np, + const char *property) +{ + return NULL; +} + #endif #endif /* __LINUX_MFD_SYSCON_H__ */ -- cgit v1.2.3 From 68a90a6c6443b07036ae4a878f6d85bf141471fd Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 25 Sep 2020 10:14:46 +0100 Subject: mfd: madera: Delete register field xxx_WIDTH defines The register field xxx_WIDTH defines are not used in current code. Signed-off-by: Richard Fitzgerald Signed-off-by: Lee Jones --- include/linux/mfd/madera/registers.h | 635 ----------------------------------- 1 file changed, 635 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/madera/registers.h b/include/linux/mfd/madera/registers.h index fe909d177762..b44aeb461d0c 100644 --- a/include/linux/mfd/madera/registers.h +++ b/include/linux/mfd/madera/registers.h @@ -1286,566 +1286,438 @@ /* (0x0000) Software_Reset */ #define MADERA_SW_RST_DEV_ID1_MASK 0xFFFF #define MADERA_SW_RST_DEV_ID1_SHIFT 0 -#define MADERA_SW_RST_DEV_ID1_WIDTH 16 /* (0x0001) Hardware_Revision */ #define MADERA_HW_REVISION_MASK 0x00FF #define MADERA_HW_REVISION_SHIFT 0 -#define MADERA_HW_REVISION_WIDTH 8 /* (0x0020) Tone_Generator_1 */ #define MADERA_TONE2_ENA 0x0002 #define MADERA_TONE2_ENA_MASK 0x0002 #define MADERA_TONE2_ENA_SHIFT 1 -#define MADERA_TONE2_ENA_WIDTH 1 #define MADERA_TONE1_ENA 0x0001 #define MADERA_TONE1_ENA_MASK 0x0001 #define MADERA_TONE1_ENA_SHIFT 0 -#define MADERA_TONE1_ENA_WIDTH 1 /* (0x0021) Tone_Generator_2 */ #define MADERA_TONE1_LVL_0_MASK 0xFFFF #define MADERA_TONE1_LVL_0_SHIFT 0 -#define MADERA_TONE1_LVL_0_WIDTH 16 /* (0x0022) Tone_Generator_3 */ #define MADERA_TONE1_LVL_MASK 0x00FF #define MADERA_TONE1_LVL_SHIFT 0 -#define MADERA_TONE1_LVL_WIDTH 8 /* (0x0023) Tone_Generator_4 */ #define MADERA_TONE2_LVL_0_MASK 0xFFFF #define MADERA_TONE2_LVL_0_SHIFT 0 -#define MADERA_TONE2_LVL_0_WIDTH 16 /* (0x0024) Tone_Generator_5 */ #define MADERA_TONE2_LVL_MASK 0x00FF #define MADERA_TONE2_LVL_SHIFT 0 -#define MADERA_TONE2_LVL_WIDTH 8 /* (0x0030) PWM_Drive_1 */ #define MADERA_PWM2_ENA 0x0002 #define MADERA_PWM2_ENA_MASK 0x0002 #define MADERA_PWM2_ENA_SHIFT 1 -#define MADERA_PWM2_ENA_WIDTH 1 #define MADERA_PWM1_ENA 0x0001 #define MADERA_PWM1_ENA_MASK 0x0001 #define MADERA_PWM1_ENA_SHIFT 0 -#define MADERA_PWM1_ENA_WIDTH 1 /* (0x00A0) Comfort_Noise_Generator */ #define MADERA_NOISE_GEN_ENA 0x0020 #define MADERA_NOISE_GEN_ENA_MASK 0x0020 #define MADERA_NOISE_GEN_ENA_SHIFT 5 -#define MADERA_NOISE_GEN_ENA_WIDTH 1 #define MADERA_NOISE_GEN_GAIN_MASK 0x001F #define MADERA_NOISE_GEN_GAIN_SHIFT 0 -#define MADERA_NOISE_GEN_GAIN_WIDTH 5 /* (0x0100) Clock_32k_1 */ #define MADERA_CLK_32K_ENA 0x0040 #define MADERA_CLK_32K_ENA_MASK 0x0040 #define MADERA_CLK_32K_ENA_SHIFT 6 -#define MADERA_CLK_32K_ENA_WIDTH 1 #define MADERA_CLK_32K_SRC_MASK 0x0003 #define MADERA_CLK_32K_SRC_SHIFT 0 -#define MADERA_CLK_32K_SRC_WIDTH 2 /* (0x0101) System_Clock_1 */ #define MADERA_SYSCLK_FRAC 0x8000 #define MADERA_SYSCLK_FRAC_MASK 0x8000 #define MADERA_SYSCLK_FRAC_SHIFT 15 -#define MADERA_SYSCLK_FRAC_WIDTH 1 #define MADERA_SYSCLK_FREQ_MASK 0x0700 #define MADERA_SYSCLK_FREQ_SHIFT 8 -#define MADERA_SYSCLK_FREQ_WIDTH 3 #define MADERA_SYSCLK_ENA 0x0040 #define MADERA_SYSCLK_ENA_MASK 0x0040 #define MADERA_SYSCLK_ENA_SHIFT 6 -#define MADERA_SYSCLK_ENA_WIDTH 1 #define MADERA_SYSCLK_SRC_MASK 0x000F #define MADERA_SYSCLK_SRC_SHIFT 0 -#define MADERA_SYSCLK_SRC_WIDTH 4 /* (0x0102) Sample_rate_1 */ #define MADERA_SAMPLE_RATE_1_MASK 0x001F #define MADERA_SAMPLE_RATE_1_SHIFT 0 -#define MADERA_SAMPLE_RATE_1_WIDTH 5 /* (0x0103) Sample_rate_2 */ #define MADERA_SAMPLE_RATE_2_MASK 0x001F #define MADERA_SAMPLE_RATE_2_SHIFT 0 -#define MADERA_SAMPLE_RATE_2_WIDTH 5 /* (0x0104) Sample_rate_3 */ #define MADERA_SAMPLE_RATE_3_MASK 0x001F #define MADERA_SAMPLE_RATE_3_SHIFT 0 -#define MADERA_SAMPLE_RATE_3_WIDTH 5 /* (0x0112) Async_clock_1 */ #define MADERA_ASYNC_CLK_FREQ_MASK 0x0700 #define MADERA_ASYNC_CLK_FREQ_SHIFT 8 -#define MADERA_ASYNC_CLK_FREQ_WIDTH 3 #define MADERA_ASYNC_CLK_ENA 0x0040 #define MADERA_ASYNC_CLK_ENA_MASK 0x0040 #define MADERA_ASYNC_CLK_ENA_SHIFT 6 -#define MADERA_ASYNC_CLK_ENA_WIDTH 1 #define MADERA_ASYNC_CLK_SRC_MASK 0x000F #define MADERA_ASYNC_CLK_SRC_SHIFT 0 -#define MADERA_ASYNC_CLK_SRC_WIDTH 4 /* (0x0113) Async_sample_rate_1 */ #define MADERA_ASYNC_SAMPLE_RATE_1_MASK 0x001F #define MADERA_ASYNC_SAMPLE_RATE_1_SHIFT 0 -#define MADERA_ASYNC_SAMPLE_RATE_1_WIDTH 5 /* (0x0114) Async_sample_rate_2 */ #define MADERA_ASYNC_SAMPLE_RATE_2_MASK 0x001F #define MADERA_ASYNC_SAMPLE_RATE_2_SHIFT 0 -#define MADERA_ASYNC_SAMPLE_RATE_2_WIDTH 5 /* (0x0120) DSP_Clock_1 */ #define MADERA_DSP_CLK_FREQ_LEGACY 0x0700 #define MADERA_DSP_CLK_FREQ_LEGACY_MASK 0x0700 #define MADERA_DSP_CLK_FREQ_LEGACY_SHIFT 8 -#define MADERA_DSP_CLK_FREQ_LEGACY_WIDTH 3 #define MADERA_DSP_CLK_ENA 0x0040 #define MADERA_DSP_CLK_ENA_MASK 0x0040 #define MADERA_DSP_CLK_ENA_SHIFT 6 -#define MADERA_DSP_CLK_ENA_WIDTH 1 #define MADERA_DSP_CLK_SRC 0x000F #define MADERA_DSP_CLK_SRC_MASK 0x000F #define MADERA_DSP_CLK_SRC_SHIFT 0 -#define MADERA_DSP_CLK_SRC_WIDTH 4 /* (0x0122) DSP_Clock_2 */ #define MADERA_DSP_CLK_FREQ_MASK 0x03FF #define MADERA_DSP_CLK_FREQ_SHIFT 0 -#define MADERA_DSP_CLK_FREQ_WIDTH 10 /* (0x0149) Output_system_clock */ #define MADERA_OPCLK_ENA 0x8000 #define MADERA_OPCLK_ENA_MASK 0x8000 #define MADERA_OPCLK_ENA_SHIFT 15 -#define MADERA_OPCLK_ENA_WIDTH 1 #define MADERA_OPCLK_DIV_MASK 0x00F8 #define MADERA_OPCLK_DIV_SHIFT 3 -#define MADERA_OPCLK_DIV_WIDTH 5 #define MADERA_OPCLK_SEL_MASK 0x0007 #define MADERA_OPCLK_SEL_SHIFT 0 -#define MADERA_OPCLK_SEL_WIDTH 3 /* (0x014A) Output_async_clock */ #define MADERA_OPCLK_ASYNC_ENA 0x8000 #define MADERA_OPCLK_ASYNC_ENA_MASK 0x8000 #define MADERA_OPCLK_ASYNC_ENA_SHIFT 15 -#define MADERA_OPCLK_ASYNC_ENA_WIDTH 1 #define MADERA_OPCLK_ASYNC_DIV_MASK 0x00F8 #define MADERA_OPCLK_ASYNC_DIV_SHIFT 3 -#define MADERA_OPCLK_ASYNC_DIV_WIDTH 5 #define MADERA_OPCLK_ASYNC_SEL_MASK 0x0007 #define MADERA_OPCLK_ASYNC_SEL_SHIFT 0 -#define MADERA_OPCLK_ASYNC_SEL_WIDTH 3 /* (0x0171) FLL1_Control_1 */ #define CS47L92_FLL1_REFCLK_SRC_MASK 0xF000 #define CS47L92_FLL1_REFCLK_SRC_SHIFT 12 -#define CS47L92_FLL1_REFCLK_SRC_WIDTH 4 #define MADERA_FLL1_HOLD_MASK 0x0004 #define MADERA_FLL1_HOLD_SHIFT 2 -#define MADERA_FLL1_HOLD_WIDTH 1 #define MADERA_FLL1_FREERUN 0x0002 #define MADERA_FLL1_FREERUN_MASK 0x0002 #define MADERA_FLL1_FREERUN_SHIFT 1 -#define MADERA_FLL1_FREERUN_WIDTH 1 #define MADERA_FLL1_ENA 0x0001 #define MADERA_FLL1_ENA_MASK 0x0001 #define MADERA_FLL1_ENA_SHIFT 0 -#define MADERA_FLL1_ENA_WIDTH 1 /* (0x0172) FLL1_Control_2 */ #define MADERA_FLL1_CTRL_UPD 0x8000 #define MADERA_FLL1_CTRL_UPD_MASK 0x8000 #define MADERA_FLL1_CTRL_UPD_SHIFT 15 -#define MADERA_FLL1_CTRL_UPD_WIDTH 1 #define MADERA_FLL1_N_MASK 0x03FF #define MADERA_FLL1_N_SHIFT 0 -#define MADERA_FLL1_N_WIDTH 10 /* (0x0173) FLL1_Control_3 */ #define MADERA_FLL1_THETA_MASK 0xFFFF #define MADERA_FLL1_THETA_SHIFT 0 -#define MADERA_FLL1_THETA_WIDTH 16 /* (0x0174) FLL1_Control_4 */ #define MADERA_FLL1_LAMBDA_MASK 0xFFFF #define MADERA_FLL1_LAMBDA_SHIFT 0 -#define MADERA_FLL1_LAMBDA_WIDTH 16 /* (0x0175) FLL1_Control_5 */ #define MADERA_FLL1_FRATIO_MASK 0x0F00 #define MADERA_FLL1_FRATIO_SHIFT 8 -#define MADERA_FLL1_FRATIO_WIDTH 4 #define MADERA_FLL1_FB_DIV_MASK 0x03FF #define MADERA_FLL1_FB_DIV_SHIFT 0 -#define MADERA_FLL1_FB_DIV_WIDTH 10 /* (0x0176) FLL1_Control_6 */ #define MADERA_FLL1_REFCLK_DIV_MASK 0x00C0 #define MADERA_FLL1_REFCLK_DIV_SHIFT 6 -#define MADERA_FLL1_REFCLK_DIV_WIDTH 2 #define MADERA_FLL1_REFCLK_SRC_MASK 0x000F #define MADERA_FLL1_REFCLK_SRC_SHIFT 0 -#define MADERA_FLL1_REFCLK_SRC_WIDTH 4 /* (0x0179) FLL1_Control_7 */ #define MADERA_FLL1_GAIN_MASK 0x003c #define MADERA_FLL1_GAIN_SHIFT 2 -#define MADERA_FLL1_GAIN_WIDTH 4 /* (0x017A) FLL1_EFS_2 */ #define MADERA_FLL1_PHASE_GAIN_MASK 0xF000 #define MADERA_FLL1_PHASE_GAIN_SHIFT 12 -#define MADERA_FLL1_PHASE_GAIN_WIDTH 4 #define MADERA_FLL1_PHASE_ENA_MASK 0x0800 #define MADERA_FLL1_PHASE_ENA_SHIFT 11 -#define MADERA_FLL1_PHASE_ENA_WIDTH 1 /* (0x017A) FLL1_Control_10 */ #define MADERA_FLL1_HP_MASK 0xC000 #define MADERA_FLL1_HP_SHIFT 14 -#define MADERA_FLL1_HP_WIDTH 2 #define MADERA_FLL1_PHASEDET_ENA_MASK 0x1000 #define MADERA_FLL1_PHASEDET_ENA_SHIFT 12 -#define MADERA_FLL1_PHASEDET_ENA_WIDTH 1 /* (0x017B) FLL1_Control_11 */ #define MADERA_FLL1_LOCKDET_THR_MASK 0x001E #define MADERA_FLL1_LOCKDET_THR_SHIFT 1 -#define MADERA_FLL1_LOCKDET_THR_WIDTH 4 #define MADERA_FLL1_LOCKDET_MASK 0x0001 #define MADERA_FLL1_LOCKDET_SHIFT 0 -#define MADERA_FLL1_LOCKDET_WIDTH 1 /* (0x017D) FLL1_Digital_Test_1 */ #define MADERA_FLL1_SYNC_EFS_ENA_MASK 0x0100 #define MADERA_FLL1_SYNC_EFS_ENA_SHIFT 8 -#define MADERA_FLL1_SYNC_EFS_ENA_WIDTH 1 #define MADERA_FLL1_CLK_VCO_FAST_SRC_MASK 0x0003 #define MADERA_FLL1_CLK_VCO_FAST_SRC_SHIFT 0 -#define MADERA_FLL1_CLK_VCO_FAST_SRC_WIDTH 2 /* (0x0181) FLL1_Synchroniser_1 */ #define MADERA_FLL1_SYNC_ENA 0x0001 #define MADERA_FLL1_SYNC_ENA_MASK 0x0001 #define MADERA_FLL1_SYNC_ENA_SHIFT 0 -#define MADERA_FLL1_SYNC_ENA_WIDTH 1 /* (0x0182) FLL1_Synchroniser_2 */ #define MADERA_FLL1_SYNC_N_MASK 0x03FF #define MADERA_FLL1_SYNC_N_SHIFT 0 -#define MADERA_FLL1_SYNC_N_WIDTH 10 /* (0x0183) FLL1_Synchroniser_3 */ #define MADERA_FLL1_SYNC_THETA_MASK 0xFFFF #define MADERA_FLL1_SYNC_THETA_SHIFT 0 -#define MADERA_FLL1_SYNC_THETA_WIDTH 16 /* (0x0184) FLL1_Synchroniser_4 */ #define MADERA_FLL1_SYNC_LAMBDA_MASK 0xFFFF #define MADERA_FLL1_SYNC_LAMBDA_SHIFT 0 -#define MADERA_FLL1_SYNC_LAMBDA_WIDTH 16 /* (0x0185) FLL1_Synchroniser_5 */ #define MADERA_FLL1_SYNC_FRATIO_MASK 0x0700 #define MADERA_FLL1_SYNC_FRATIO_SHIFT 8 -#define MADERA_FLL1_SYNC_FRATIO_WIDTH 3 /* (0x0186) FLL1_Synchroniser_6 */ #define MADERA_FLL1_SYNCCLK_DIV_MASK 0x00C0 #define MADERA_FLL1_SYNCCLK_DIV_SHIFT 6 -#define MADERA_FLL1_SYNCCLK_DIV_WIDTH 2 #define MADERA_FLL1_SYNCCLK_SRC_MASK 0x000F #define MADERA_FLL1_SYNCCLK_SRC_SHIFT 0 -#define MADERA_FLL1_SYNCCLK_SRC_WIDTH 4 /* (0x0187) FLL1_Synchroniser_7 */ #define MADERA_FLL1_SYNC_GAIN_MASK 0x003c #define MADERA_FLL1_SYNC_GAIN_SHIFT 2 -#define MADERA_FLL1_SYNC_GAIN_WIDTH 4 #define MADERA_FLL1_SYNC_DFSAT 0x0001 #define MADERA_FLL1_SYNC_DFSAT_MASK 0x0001 #define MADERA_FLL1_SYNC_DFSAT_SHIFT 0 -#define MADERA_FLL1_SYNC_DFSAT_WIDTH 1 /* (0x01D1) FLL_AO_Control_1 */ #define MADERA_FLL_AO_HOLD 0x0004 #define MADERA_FLL_AO_HOLD_MASK 0x0004 #define MADERA_FLL_AO_HOLD_SHIFT 2 -#define MADERA_FLL_AO_HOLD_WIDTH 1 #define MADERA_FLL_AO_FREERUN 0x0002 #define MADERA_FLL_AO_FREERUN_MASK 0x0002 #define MADERA_FLL_AO_FREERUN_SHIFT 1 -#define MADERA_FLL_AO_FREERUN_WIDTH 1 #define MADERA_FLL_AO_ENA 0x0001 #define MADERA_FLL_AO_ENA_MASK 0x0001 #define MADERA_FLL_AO_ENA_SHIFT 0 -#define MADERA_FLL_AO_ENA_WIDTH 1 /* (0x01D2) FLL_AO_Control_2 */ #define MADERA_FLL_AO_CTRL_UPD 0x8000 #define MADERA_FLL_AO_CTRL_UPD_MASK 0x8000 #define MADERA_FLL_AO_CTRL_UPD_SHIFT 15 -#define MADERA_FLL_AO_CTRL_UPD_WIDTH 1 /* (0x01D6) FLL_AO_Control_6 */ #define MADERA_FLL_AO_REFCLK_SRC_MASK 0x000F #define MADERA_FLL_AO_REFCLK_SRC_SHIFT 0 -#define MADERA_FLL_AO_REFCLK_SRC_WIDTH 4 /* (0x0200) Mic_Charge_Pump_1 */ #define MADERA_CPMIC_BYPASS 0x0002 #define MADERA_CPMIC_BYPASS_MASK 0x0002 #define MADERA_CPMIC_BYPASS_SHIFT 1 -#define MADERA_CPMIC_BYPASS_WIDTH 1 #define MADERA_CPMIC_ENA 0x0001 #define MADERA_CPMIC_ENA_MASK 0x0001 #define MADERA_CPMIC_ENA_SHIFT 0 -#define MADERA_CPMIC_ENA_WIDTH 1 /* (0x0210) LDO1_Control_1 */ #define MADERA_LDO1_VSEL_MASK 0x07E0 #define MADERA_LDO1_VSEL_SHIFT 5 -#define MADERA_LDO1_VSEL_WIDTH 6 #define MADERA_LDO1_FAST 0x0010 #define MADERA_LDO1_FAST_MASK 0x0010 #define MADERA_LDO1_FAST_SHIFT 4 -#define MADERA_LDO1_FAST_WIDTH 1 #define MADERA_LDO1_DISCH 0x0004 #define MADERA_LDO1_DISCH_MASK 0x0004 #define MADERA_LDO1_DISCH_SHIFT 2 -#define MADERA_LDO1_DISCH_WIDTH 1 #define MADERA_LDO1_BYPASS 0x0002 #define MADERA_LDO1_BYPASS_MASK 0x0002 #define MADERA_LDO1_BYPASS_SHIFT 1 -#define MADERA_LDO1_BYPASS_WIDTH 1 #define MADERA_LDO1_ENA 0x0001 #define MADERA_LDO1_ENA_MASK 0x0001 #define MADERA_LDO1_ENA_SHIFT 0 -#define MADERA_LDO1_ENA_WIDTH 1 /* (0x0213) LDO2_Control_1 */ #define MADERA_LDO2_VSEL_MASK 0x07E0 #define MADERA_LDO2_VSEL_SHIFT 5 -#define MADERA_LDO2_VSEL_WIDTH 6 #define MADERA_LDO2_FAST 0x0010 #define MADERA_LDO2_FAST_MASK 0x0010 #define MADERA_LDO2_FAST_SHIFT 4 -#define MADERA_LDO2_FAST_WIDTH 1 #define MADERA_LDO2_DISCH 0x0004 #define MADERA_LDO2_DISCH_MASK 0x0004 #define MADERA_LDO2_DISCH_SHIFT 2 -#define MADERA_LDO2_DISCH_WIDTH 1 #define MADERA_LDO2_BYPASS 0x0002 #define MADERA_LDO2_BYPASS_MASK 0x0002 #define MADERA_LDO2_BYPASS_SHIFT 1 -#define MADERA_LDO2_BYPASS_WIDTH 1 #define MADERA_LDO2_ENA 0x0001 #define MADERA_LDO2_ENA_MASK 0x0001 #define MADERA_LDO2_ENA_SHIFT 0 -#define MADERA_LDO2_ENA_WIDTH 1 /* (0x0218) Mic_Bias_Ctrl_1 */ #define MADERA_MICB1_EXT_CAP 0x8000 #define MADERA_MICB1_EXT_CAP_MASK 0x8000 #define MADERA_MICB1_EXT_CAP_SHIFT 15 -#define MADERA_MICB1_EXT_CAP_WIDTH 1 #define MADERA_MICB1_LVL_MASK 0x01E0 #define MADERA_MICB1_LVL_SHIFT 5 -#define MADERA_MICB1_LVL_WIDTH 4 #define MADERA_MICB1_ENA 0x0001 #define MADERA_MICB1_ENA_MASK 0x0001 #define MADERA_MICB1_ENA_SHIFT 0 -#define MADERA_MICB1_ENA_WIDTH 1 /* (0x021C) Mic_Bias_Ctrl_5 */ #define MADERA_MICB1D_ENA 0x1000 #define MADERA_MICB1D_ENA_MASK 0x1000 #define MADERA_MICB1D_ENA_SHIFT 12 -#define MADERA_MICB1D_ENA_WIDTH 1 #define MADERA_MICB1C_ENA 0x0100 #define MADERA_MICB1C_ENA_MASK 0x0100 #define MADERA_MICB1C_ENA_SHIFT 8 -#define MADERA_MICB1C_ENA_WIDTH 1 #define MADERA_MICB1B_ENA 0x0010 #define MADERA_MICB1B_ENA_MASK 0x0010 #define MADERA_MICB1B_ENA_SHIFT 4 -#define MADERA_MICB1B_ENA_WIDTH 1 #define MADERA_MICB1A_ENA 0x0001 #define MADERA_MICB1A_ENA_MASK 0x0001 #define MADERA_MICB1A_ENA_SHIFT 0 -#define MADERA_MICB1A_ENA_WIDTH 1 /* (0x021E) Mic_Bias_Ctrl_6 */ #define MADERA_MICB2D_ENA 0x1000 #define MADERA_MICB2D_ENA_MASK 0x1000 #define MADERA_MICB2D_ENA_SHIFT 12 -#define MADERA_MICB2D_ENA_WIDTH 1 #define MADERA_MICB2C_ENA 0x0100 #define MADERA_MICB2C_ENA_MASK 0x0100 #define MADERA_MICB2C_ENA_SHIFT 8 -#define MADERA_MICB2C_ENA_WIDTH 1 #define MADERA_MICB2B_ENA 0x0010 #define MADERA_MICB2B_ENA_MASK 0x0010 #define MADERA_MICB2B_ENA_SHIFT 4 -#define MADERA_MICB2B_ENA_WIDTH 1 #define MADERA_MICB2A_ENA 0x0001 #define MADERA_MICB2A_ENA_MASK 0x0001 #define MADERA_MICB2A_ENA_SHIFT 0 -#define MADERA_MICB2A_ENA_WIDTH 1 /* (0x0225) - HP Ctrl 1L */ #define MADERA_RMV_SHRT_HP1L 0x4000 #define MADERA_RMV_SHRT_HP1L_MASK 0x4000 #define MADERA_RMV_SHRT_HP1L_SHIFT 14 -#define MADERA_RMV_SHRT_HP1L_WIDTH 1 #define MADERA_HP1L_FLWR 0x0004 #define MADERA_HP1L_FLWR_MASK 0x0004 #define MADERA_HP1L_FLWR_SHIFT 2 -#define MADERA_HP1L_FLWR_WIDTH 1 #define MADERA_HP1L_SHRTI 0x0002 #define MADERA_HP1L_SHRTI_MASK 0x0002 #define MADERA_HP1L_SHRTI_SHIFT 1 -#define MADERA_HP1L_SHRTI_WIDTH 1 #define MADERA_HP1L_SHRTO 0x0001 #define MADERA_HP1L_SHRTO_MASK 0x0001 #define MADERA_HP1L_SHRTO_SHIFT 0 -#define MADERA_HP1L_SHRTO_WIDTH 1 /* (0x0226) - HP Ctrl 1R */ #define MADERA_RMV_SHRT_HP1R 0x4000 #define MADERA_RMV_SHRT_HP1R_MASK 0x4000 #define MADERA_RMV_SHRT_HP1R_SHIFT 14 -#define MADERA_RMV_SHRT_HP1R_WIDTH 1 #define MADERA_HP1R_FLWR 0x0004 #define MADERA_HP1R_FLWR_MASK 0x0004 #define MADERA_HP1R_FLWR_SHIFT 2 -#define MADERA_HP1R_FLWR_WIDTH 1 #define MADERA_HP1R_SHRTI 0x0002 #define MADERA_HP1R_SHRTI_MASK 0x0002 #define MADERA_HP1R_SHRTI_SHIFT 1 -#define MADERA_HP1R_SHRTI_WIDTH 1 #define MADERA_HP1R_SHRTO 0x0001 #define MADERA_HP1R_SHRTO_MASK 0x0001 #define MADERA_HP1R_SHRTO_SHIFT 0 -#define MADERA_HP1R_SHRTO_WIDTH 1 /* (0x0293) Accessory_Detect_Mode_1 */ #define MADERA_ACCDET_SRC 0x2000 #define MADERA_ACCDET_SRC_MASK 0x2000 #define MADERA_ACCDET_SRC_SHIFT 13 -#define MADERA_ACCDET_SRC_WIDTH 1 #define MADERA_ACCDET_POLARITY_INV_ENA 0x0080 #define MADERA_ACCDET_POLARITY_INV_ENA_MASK 0x0080 #define MADERA_ACCDET_POLARITY_INV_ENA_SHIFT 7 -#define MADERA_ACCDET_POLARITY_INV_ENA_WIDTH 1 #define MADERA_ACCDET_MODE_MASK 0x0007 #define MADERA_ACCDET_MODE_SHIFT 0 -#define MADERA_ACCDET_MODE_WIDTH 3 /* (0x0299) Headphone_Detect_0 */ #define MADERA_HPD_GND_SEL 0x0007 #define MADERA_HPD_GND_SEL_MASK 0x0007 #define MADERA_HPD_GND_SEL_SHIFT 0 -#define MADERA_HPD_GND_SEL_WIDTH 3 #define MADERA_HPD_SENSE_SEL 0x00F0 #define MADERA_HPD_SENSE_SEL_MASK 0x00F0 #define MADERA_HPD_SENSE_SEL_SHIFT 4 -#define MADERA_HPD_SENSE_SEL_WIDTH 4 #define MADERA_HPD_FRC_SEL 0x0F00 #define MADERA_HPD_FRC_SEL_MASK 0x0F00 #define MADERA_HPD_FRC_SEL_SHIFT 8 -#define MADERA_HPD_FRC_SEL_WIDTH 4 #define MADERA_HPD_OUT_SEL 0x7000 #define MADERA_HPD_OUT_SEL_MASK 0x7000 #define MADERA_HPD_OUT_SEL_SHIFT 12 -#define MADERA_HPD_OUT_SEL_WIDTH 3 #define MADERA_HPD_OVD_ENA_SEL 0x8000 #define MADERA_HPD_OVD_ENA_SEL_MASK 0x8000 #define MADERA_HPD_OVD_ENA_SEL_SHIFT 15 -#define MADERA_HPD_OVD_ENA_SEL_WIDTH 1 /* (0x029B) Headphone_Detect_1 */ #define MADERA_HP_IMPEDANCE_RANGE_MASK 0x0600 #define MADERA_HP_IMPEDANCE_RANGE_SHIFT 9 -#define MADERA_HP_IMPEDANCE_RANGE_WIDTH 2 #define MADERA_HP_STEP_SIZE 0x0100 #define MADERA_HP_STEP_SIZE_MASK 0x0100 #define MADERA_HP_STEP_SIZE_SHIFT 8 -#define MADERA_HP_STEP_SIZE_WIDTH 1 #define MADERA_HP_CLK_DIV_MASK 0x0018 #define MADERA_HP_CLK_DIV_SHIFT 3 -#define MADERA_HP_CLK_DIV_WIDTH 2 #define MADERA_HP_RATE_MASK 0x0006 #define MADERA_HP_RATE_SHIFT 1 -#define MADERA_HP_RATE_WIDTH 2 #define MADERA_HP_POLL 0x0001 #define MADERA_HP_POLL_MASK 0x0001 #define MADERA_HP_POLL_SHIFT 0 -#define MADERA_HP_POLL_WIDTH 1 /* (0x029C) Headphone_Detect_2 */ #define MADERA_HP_DONE_MASK 0x8000 #define MADERA_HP_DONE_SHIFT 15 -#define MADERA_HP_DONE_WIDTH 1 #define MADERA_HP_LVL_MASK 0x7FFF #define MADERA_HP_LVL_SHIFT 0 -#define MADERA_HP_LVL_WIDTH 15 /* (0x029D) Headphone_Detect_3 */ #define MADERA_HP_DACVAL_MASK 0x03FF #define MADERA_HP_DACVAL_SHIFT 0 -#define MADERA_HP_DACVAL_WIDTH 10 /* (0x029F) - Headphone Detect 5 */ #define MADERA_HP_DACVAL_DOWN_MASK 0x03FF #define MADERA_HP_DACVAL_DOWN_SHIFT 0 -#define MADERA_HP_DACVAL_DOWN_WIDTH 10 /* (0x02A2) Mic_Detect_1_Control_0 */ #define MADERA_MICD1_GND_MASK 0x0007 #define MADERA_MICD1_GND_SHIFT 0 -#define MADERA_MICD1_GND_WIDTH 3 #define MADERA_MICD1_SENSE_MASK 0x00F0 #define MADERA_MICD1_SENSE_SHIFT 4 -#define MADERA_MICD1_SENSE_WIDTH 4 #define MADERA_MICD1_ADC_MODE_MASK 0x8000 #define MADERA_MICD1_ADC_MODE_SHIFT 15 -#define MADERA_MICD1_ADC_MODE_WIDTH 1 /* (0x02A3) Mic_Detect_1_Control_1 */ #define MADERA_MICD_BIAS_STARTTIME_MASK 0xF000 #define MADERA_MICD_BIAS_STARTTIME_SHIFT 12 -#define MADERA_MICD_BIAS_STARTTIME_WIDTH 4 #define MADERA_MICD_RATE_MASK 0x0F00 #define MADERA_MICD_RATE_SHIFT 8 -#define MADERA_MICD_RATE_WIDTH 4 #define MADERA_MICD_BIAS_SRC_MASK 0x00F0 #define MADERA_MICD_BIAS_SRC_SHIFT 4 -#define MADERA_MICD_BIAS_SRC_WIDTH 4 #define MADERA_MICD_DBTIME 0x0002 #define MADERA_MICD_DBTIME_MASK 0x0002 #define MADERA_MICD_DBTIME_SHIFT 1 -#define MADERA_MICD_DBTIME_WIDTH 1 #define MADERA_MICD_ENA 0x0001 #define MADERA_MICD_ENA_MASK 0x0001 #define MADERA_MICD_ENA_SHIFT 0 -#define MADERA_MICD_ENA_WIDTH 1 /* (0x02A4) Mic_Detect_1_Control_2 */ #define MADERA_MICD_LVL_SEL_MASK 0x00FF #define MADERA_MICD_LVL_SEL_SHIFT 0 -#define MADERA_MICD_LVL_SEL_WIDTH 8 /* (0x02A5) Mic_Detect_1_Control_3 */ #define MADERA_MICD_LVL_0 0x0004 @@ -1859,1746 +1731,1341 @@ #define MADERA_MICD_LVL_8 0x0400 #define MADERA_MICD_LVL_MASK 0x07FC #define MADERA_MICD_LVL_SHIFT 2 -#define MADERA_MICD_LVL_WIDTH 9 #define MADERA_MICD_VALID 0x0002 #define MADERA_MICD_VALID_MASK 0x0002 #define MADERA_MICD_VALID_SHIFT 1 -#define MADERA_MICD_VALID_WIDTH 1 #define MADERA_MICD_STS 0x0001 #define MADERA_MICD_STS_MASK 0x0001 #define MADERA_MICD_STS_SHIFT 0 -#define MADERA_MICD_STS_WIDTH 1 /* (0x02AB) Mic_Detect_1_Control_4 */ #define MADERA_MICDET_ADCVAL_DIFF_MASK 0xFF00 #define MADERA_MICDET_ADCVAL_DIFF_SHIFT 8 -#define MADERA_MICDET_ADCVAL_DIFF_WIDTH 8 #define MADERA_MICDET_ADCVAL_MASK 0x007F #define MADERA_MICDET_ADCVAL_SHIFT 0 -#define MADERA_MICDET_ADCVAL_WIDTH 7 /* (0x02C6) Micd_Clamp_control */ #define MADERA_MICD_CLAMP_OVD 0x0010 #define MADERA_MICD_CLAMP_OVD_MASK 0x0010 #define MADERA_MICD_CLAMP_OVD_SHIFT 4 -#define MADERA_MICD_CLAMP_OVD_WIDTH 1 #define MADERA_MICD_CLAMP_MODE_MASK 0x000F #define MADERA_MICD_CLAMP_MODE_SHIFT 0 -#define MADERA_MICD_CLAMP_MODE_WIDTH 4 /* (0x02C8) GP_Switch_1 */ #define MADERA_SW2_MODE_MASK 0x000C #define MADERA_SW2_MODE_SHIFT 2 -#define MADERA_SW2_MODE_WIDTH 2 #define MADERA_SW1_MODE_MASK 0x0003 #define MADERA_SW1_MODE_SHIFT 0 -#define MADERA_SW1_MODE_WIDTH 2 /* (0x02D3) Jack_detect_analogue */ #define MADERA_JD2_ENA 0x0002 #define MADERA_JD2_ENA_MASK 0x0002 #define MADERA_JD2_ENA_SHIFT 1 -#define MADERA_JD2_ENA_WIDTH 1 #define MADERA_JD1_ENA 0x0001 #define MADERA_JD1_ENA_MASK 0x0001 #define MADERA_JD1_ENA_SHIFT 0 -#define MADERA_JD1_ENA_WIDTH 1 /* (0x0300) Input_Enables */ #define MADERA_IN6L_ENA 0x0800 #define MADERA_IN6L_ENA_MASK 0x0800 #define MADERA_IN6L_ENA_SHIFT 11 -#define MADERA_IN6L_ENA_WIDTH 1 #define MADERA_IN6R_ENA 0x0400 #define MADERA_IN6R_ENA_MASK 0x0400 #define MADERA_IN6R_ENA_SHIFT 10 -#define MADERA_IN6R_ENA_WIDTH 1 #define MADERA_IN5L_ENA 0x0200 #define MADERA_IN5L_ENA_MASK 0x0200 #define MADERA_IN5L_ENA_SHIFT 9 -#define MADERA_IN5L_ENA_WIDTH 1 #define MADERA_IN5R_ENA 0x0100 #define MADERA_IN5R_ENA_MASK 0x0100 #define MADERA_IN5R_ENA_SHIFT 8 -#define MADERA_IN5R_ENA_WIDTH 1 #define MADERA_IN4L_ENA 0x0080 #define MADERA_IN4L_ENA_MASK 0x0080 #define MADERA_IN4L_ENA_SHIFT 7 -#define MADERA_IN4L_ENA_WIDTH 1 #define MADERA_IN4R_ENA 0x0040 #define MADERA_IN4R_ENA_MASK 0x0040 #define MADERA_IN4R_ENA_SHIFT 6 -#define MADERA_IN4R_ENA_WIDTH 1 #define MADERA_IN3L_ENA 0x0020 #define MADERA_IN3L_ENA_MASK 0x0020 #define MADERA_IN3L_ENA_SHIFT 5 -#define MADERA_IN3L_ENA_WIDTH 1 #define MADERA_IN3R_ENA 0x0010 #define MADERA_IN3R_ENA_MASK 0x0010 #define MADERA_IN3R_ENA_SHIFT 4 -#define MADERA_IN3R_ENA_WIDTH 1 #define MADERA_IN2L_ENA 0x0008 #define MADERA_IN2L_ENA_MASK 0x0008 #define MADERA_IN2L_ENA_SHIFT 3 -#define MADERA_IN2L_ENA_WIDTH 1 #define MADERA_IN2R_ENA 0x0004 #define MADERA_IN2R_ENA_MASK 0x0004 #define MADERA_IN2R_ENA_SHIFT 2 -#define MADERA_IN2R_ENA_WIDTH 1 #define MADERA_IN1L_ENA 0x0002 #define MADERA_IN1L_ENA_MASK 0x0002 #define MADERA_IN1L_ENA_SHIFT 1 -#define MADERA_IN1L_ENA_WIDTH 1 #define MADERA_IN1R_ENA 0x0001 #define MADERA_IN1R_ENA_MASK 0x0001 #define MADERA_IN1R_ENA_SHIFT 0 -#define MADERA_IN1R_ENA_WIDTH 1 /* (0x0308) Input_Rate */ #define MADERA_IN_RATE_MASK 0xF800 #define MADERA_IN_RATE_SHIFT 11 -#define MADERA_IN_RATE_WIDTH 5 #define MADERA_IN_MODE_MASK 0x0400 #define MADERA_IN_MODE_SHIFT 10 -#define MADERA_IN_MODE_WIDTH 1 /* (0x0309) Input_Volume_Ramp */ #define MADERA_IN_VD_RAMP_MASK 0x0070 #define MADERA_IN_VD_RAMP_SHIFT 4 -#define MADERA_IN_VD_RAMP_WIDTH 3 #define MADERA_IN_VI_RAMP_MASK 0x0007 #define MADERA_IN_VI_RAMP_SHIFT 0 -#define MADERA_IN_VI_RAMP_WIDTH 3 /* (0x030C) HPF_Control */ #define MADERA_IN_HPF_CUT_MASK 0x0007 #define MADERA_IN_HPF_CUT_SHIFT 0 -#define MADERA_IN_HPF_CUT_WIDTH 3 /* (0x0310) IN1L_Control */ #define MADERA_IN1L_HPF_MASK 0x8000 #define MADERA_IN1L_HPF_SHIFT 15 -#define MADERA_IN1L_HPF_WIDTH 1 #define MADERA_IN1_DMIC_SUP_MASK 0x1800 #define MADERA_IN1_DMIC_SUP_SHIFT 11 -#define MADERA_IN1_DMIC_SUP_WIDTH 2 #define MADERA_IN1_MODE_MASK 0x0400 #define MADERA_IN1_MODE_SHIFT 10 -#define MADERA_IN1_MODE_WIDTH 1 #define MADERA_IN1L_PGA_VOL_MASK 0x00FE #define MADERA_IN1L_PGA_VOL_SHIFT 1 -#define MADERA_IN1L_PGA_VOL_WIDTH 7 /* (0x0311) ADC_Digital_Volume_1L */ #define MADERA_IN1L_SRC_MASK 0x4000 #define MADERA_IN1L_SRC_SHIFT 14 -#define MADERA_IN1L_SRC_WIDTH 1 #define MADERA_IN1L_SRC_SE_MASK 0x2000 #define MADERA_IN1L_SRC_SE_SHIFT 13 -#define MADERA_IN1L_SRC_SE_WIDTH 1 #define MADERA_IN1L_LP_MODE 0x0800 #define MADERA_IN1L_LP_MODE_MASK 0x0800 #define MADERA_IN1L_LP_MODE_SHIFT 11 -#define MADERA_IN1L_LP_MODE_WIDTH 1 #define MADERA_IN_VU 0x0200 #define MADERA_IN_VU_MASK 0x0200 #define MADERA_IN_VU_SHIFT 9 -#define MADERA_IN_VU_WIDTH 1 #define MADERA_IN1L_MUTE 0x0100 #define MADERA_IN1L_MUTE_MASK 0x0100 #define MADERA_IN1L_MUTE_SHIFT 8 -#define MADERA_IN1L_MUTE_WIDTH 1 #define MADERA_IN1L_DIG_VOL_MASK 0x00FF #define MADERA_IN1L_DIG_VOL_SHIFT 0 -#define MADERA_IN1L_DIG_VOL_WIDTH 8 /* (0x0312) DMIC1L_Control */ #define MADERA_IN1_OSR_MASK 0x0700 #define MADERA_IN1_OSR_SHIFT 8 -#define MADERA_IN1_OSR_WIDTH 3 /* (0x0313) IN1L_Rate_Control */ #define MADERA_IN1L_RATE_MASK 0xF800 #define MADERA_IN1L_RATE_SHIFT 11 -#define MADERA_IN1L_RATE_WIDTH 5 /* (0x0314) IN1R_Control */ #define MADERA_IN1R_HPF_MASK 0x8000 #define MADERA_IN1R_HPF_SHIFT 15 -#define MADERA_IN1R_HPF_WIDTH 1 #define MADERA_IN1R_PGA_VOL_MASK 0x00FE #define MADERA_IN1R_PGA_VOL_SHIFT 1 -#define MADERA_IN1R_PGA_VOL_WIDTH 7 #define MADERA_IN1_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN1_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN1_DMICCLK_SRC_WIDTH 2 /* (0x0315) ADC_Digital_Volume_1R */ #define MADERA_IN1R_SRC_MASK 0x4000 #define MADERA_IN1R_SRC_SHIFT 14 -#define MADERA_IN1R_SRC_WIDTH 1 #define MADERA_IN1R_SRC_SE_MASK 0x2000 #define MADERA_IN1R_SRC_SE_SHIFT 13 -#define MADERA_IN1R_SRC_SE_WIDTH 1 #define MADERA_IN1R_LP_MODE 0x0800 #define MADERA_IN1R_LP_MODE_MASK 0x0800 #define MADERA_IN1R_LP_MODE_SHIFT 11 -#define MADERA_IN1R_LP_MODE_WIDTH 1 #define MADERA_IN1R_MUTE 0x0100 #define MADERA_IN1R_MUTE_MASK 0x0100 #define MADERA_IN1R_MUTE_SHIFT 8 -#define MADERA_IN1R_MUTE_WIDTH 1 #define MADERA_IN1R_DIG_VOL_MASK 0x00FF #define MADERA_IN1R_DIG_VOL_SHIFT 0 -#define MADERA_IN1R_DIG_VOL_WIDTH 8 /* (0x0317) IN1R_Rate_Control */ #define MADERA_IN1R_RATE_MASK 0xF800 #define MADERA_IN1R_RATE_SHIFT 11 -#define MADERA_IN1R_RATE_WIDTH 5 /* (0x0318) IN2L_Control */ #define MADERA_IN2L_HPF_MASK 0x8000 #define MADERA_IN2L_HPF_SHIFT 15 -#define MADERA_IN2L_HPF_WIDTH 1 #define MADERA_IN2_DMIC_SUP_MASK 0x1800 #define MADERA_IN2_DMIC_SUP_SHIFT 11 -#define MADERA_IN2_DMIC_SUP_WIDTH 2 #define MADERA_IN2_MODE_MASK 0x0400 #define MADERA_IN2_MODE_SHIFT 10 -#define MADERA_IN2_MODE_WIDTH 1 #define MADERA_IN2L_PGA_VOL_MASK 0x00FE #define MADERA_IN2L_PGA_VOL_SHIFT 1 -#define MADERA_IN2L_PGA_VOL_WIDTH 7 /* (0x0319) ADC_Digital_Volume_2L */ #define MADERA_IN2L_SRC_MASK 0x4000 #define MADERA_IN2L_SRC_SHIFT 14 -#define MADERA_IN2L_SRC_WIDTH 1 #define MADERA_IN2L_SRC_SE_MASK 0x2000 #define MADERA_IN2L_SRC_SE_SHIFT 13 -#define MADERA_IN2L_SRC_SE_WIDTH 1 #define MADERA_IN2L_LP_MODE 0x0800 #define MADERA_IN2L_LP_MODE_MASK 0x0800 #define MADERA_IN2L_LP_MODE_SHIFT 11 -#define MADERA_IN2L_LP_MODE_WIDTH 1 #define MADERA_IN2L_MUTE 0x0100 #define MADERA_IN2L_MUTE_MASK 0x0100 #define MADERA_IN2L_MUTE_SHIFT 8 -#define MADERA_IN2L_MUTE_WIDTH 1 #define MADERA_IN2L_DIG_VOL_MASK 0x00FF #define MADERA_IN2L_DIG_VOL_SHIFT 0 -#define MADERA_IN2L_DIG_VOL_WIDTH 8 /* (0x031A) DMIC2L_Control */ #define MADERA_IN2_OSR_MASK 0x0700 #define MADERA_IN2_OSR_SHIFT 8 -#define MADERA_IN2_OSR_WIDTH 3 /* (0x031C) IN2R_Control */ #define MADERA_IN2R_HPF_MASK 0x8000 #define MADERA_IN2R_HPF_SHIFT 15 -#define MADERA_IN2R_HPF_WIDTH 1 #define MADERA_IN2R_PGA_VOL_MASK 0x00FE #define MADERA_IN2R_PGA_VOL_SHIFT 1 -#define MADERA_IN2R_PGA_VOL_WIDTH 7 #define MADERA_IN2_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN2_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN2_DMICCLK_SRC_WIDTH 2 /* (0x031D) ADC_Digital_Volume_2R */ #define MADERA_IN2R_SRC_MASK 0x4000 #define MADERA_IN2R_SRC_SHIFT 14 -#define MADERA_IN2R_SRC_WIDTH 1 #define MADERA_IN2R_SRC_SE_MASK 0x2000 #define MADERA_IN2R_SRC_SE_SHIFT 13 -#define MADERA_IN2R_SRC_SE_WIDTH 1 #define MADERA_IN2R_LP_MODE 0x0800 #define MADERA_IN2R_LP_MODE_MASK 0x0800 #define MADERA_IN2R_LP_MODE_SHIFT 11 -#define MADERA_IN2R_LP_MODE_WIDTH 1 #define MADERA_IN2R_MUTE 0x0100 #define MADERA_IN2R_MUTE_MASK 0x0100 #define MADERA_IN2R_MUTE_SHIFT 8 -#define MADERA_IN2R_MUTE_WIDTH 1 #define MADERA_IN2R_DIG_VOL_MASK 0x00FF #define MADERA_IN2R_DIG_VOL_SHIFT 0 -#define MADERA_IN2R_DIG_VOL_WIDTH 8 /* (0x0320) IN3L_Control */ #define MADERA_IN3L_HPF_MASK 0x8000 #define MADERA_IN3L_HPF_SHIFT 15 -#define MADERA_IN3L_HPF_WIDTH 1 #define MADERA_IN3_DMIC_SUP_MASK 0x1800 #define MADERA_IN3_DMIC_SUP_SHIFT 11 -#define MADERA_IN3_DMIC_SUP_WIDTH 2 #define MADERA_IN3_MODE_MASK 0x0400 #define MADERA_IN3_MODE_SHIFT 10 -#define MADERA_IN3_MODE_WIDTH 1 #define MADERA_IN3L_PGA_VOL_MASK 0x00FE #define MADERA_IN3L_PGA_VOL_SHIFT 1 -#define MADERA_IN3L_PGA_VOL_WIDTH 7 /* (0x0321) ADC_Digital_Volume_3L */ #define MADERA_IN3L_MUTE 0x0100 #define MADERA_IN3L_MUTE_MASK 0x0100 #define MADERA_IN3L_MUTE_SHIFT 8 -#define MADERA_IN3L_MUTE_WIDTH 1 #define MADERA_IN3L_DIG_VOL_MASK 0x00FF #define MADERA_IN3L_DIG_VOL_SHIFT 0 -#define MADERA_IN3L_DIG_VOL_WIDTH 8 /* (0x0322) DMIC3L_Control */ #define MADERA_IN3_OSR_MASK 0x0700 #define MADERA_IN3_OSR_SHIFT 8 -#define MADERA_IN3_OSR_WIDTH 3 /* (0x0324) IN3R_Control */ #define MADERA_IN3R_HPF_MASK 0x8000 #define MADERA_IN3R_HPF_SHIFT 15 -#define MADERA_IN3R_HPF_WIDTH 1 #define MADERA_IN3R_PGA_VOL_MASK 0x00FE #define MADERA_IN3R_PGA_VOL_SHIFT 1 -#define MADERA_IN3R_PGA_VOL_WIDTH 7 #define MADERA_IN3_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN3_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN3_DMICCLK_SRC_WIDTH 2 /* (0x0325) ADC_Digital_Volume_3R */ #define MADERA_IN3R_MUTE 0x0100 #define MADERA_IN3R_MUTE_MASK 0x0100 #define MADERA_IN3R_MUTE_SHIFT 8 -#define MADERA_IN3R_MUTE_WIDTH 1 #define MADERA_IN3R_DIG_VOL_MASK 0x00FF #define MADERA_IN3R_DIG_VOL_SHIFT 0 -#define MADERA_IN3R_DIG_VOL_WIDTH 8 /* (0x0328) IN4L_Control */ #define MADERA_IN4L_HPF_MASK 0x8000 #define MADERA_IN4L_HPF_SHIFT 15 -#define MADERA_IN4L_HPF_WIDTH 1 #define MADERA_IN4_DMIC_SUP_MASK 0x1800 #define MADERA_IN4_DMIC_SUP_SHIFT 11 -#define MADERA_IN4_DMIC_SUP_WIDTH 2 /* (0x0329) ADC_Digital_Volume_4L */ #define MADERA_IN4L_MUTE 0x0100 #define MADERA_IN4L_MUTE_MASK 0x0100 #define MADERA_IN4L_MUTE_SHIFT 8 -#define MADERA_IN4L_MUTE_WIDTH 1 #define MADERA_IN4L_DIG_VOL_MASK 0x00FF #define MADERA_IN4L_DIG_VOL_SHIFT 0 -#define MADERA_IN4L_DIG_VOL_WIDTH 8 /* (0x032A) DMIC4L_Control */ #define MADERA_IN4_OSR_MASK 0x0700 #define MADERA_IN4_OSR_SHIFT 8 -#define MADERA_IN4_OSR_WIDTH 3 /* (0x032C) IN4R_Control */ #define MADERA_IN4R_HPF_MASK 0x8000 #define MADERA_IN4R_HPF_SHIFT 15 -#define MADERA_IN4R_HPF_WIDTH 1 #define MADERA_IN4_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN4_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN4_DMICCLK_SRC_WIDTH 2 /* (0x032D) ADC_Digital_Volume_4R */ #define MADERA_IN4R_MUTE 0x0100 #define MADERA_IN4R_MUTE_MASK 0x0100 #define MADERA_IN4R_MUTE_SHIFT 8 -#define MADERA_IN4R_MUTE_WIDTH 1 #define MADERA_IN4R_DIG_VOL_MASK 0x00FF #define MADERA_IN4R_DIG_VOL_SHIFT 0 -#define MADERA_IN4R_DIG_VOL_WIDTH 8 /* (0x0330) IN5L_Control */ #define MADERA_IN5L_HPF_MASK 0x8000 #define MADERA_IN5L_HPF_SHIFT 15 -#define MADERA_IN5L_HPF_WIDTH 1 #define MADERA_IN5_DMIC_SUP_MASK 0x1800 #define MADERA_IN5_DMIC_SUP_SHIFT 11 -#define MADERA_IN5_DMIC_SUP_WIDTH 2 /* (0x0331) ADC_Digital_Volume_5L */ #define MADERA_IN5L_MUTE 0x0100 #define MADERA_IN5L_MUTE_MASK 0x0100 #define MADERA_IN5L_MUTE_SHIFT 8 -#define MADERA_IN5L_MUTE_WIDTH 1 #define MADERA_IN5L_DIG_VOL_MASK 0x00FF #define MADERA_IN5L_DIG_VOL_SHIFT 0 -#define MADERA_IN5L_DIG_VOL_WIDTH 8 /* (0x0332) DMIC5L_Control */ #define MADERA_IN5_OSR_MASK 0x0700 #define MADERA_IN5_OSR_SHIFT 8 -#define MADERA_IN5_OSR_WIDTH 3 /* (0x0334) IN5R_Control */ #define MADERA_IN5R_HPF_MASK 0x8000 #define MADERA_IN5R_HPF_SHIFT 15 -#define MADERA_IN5R_HPF_WIDTH 1 #define MADERA_IN5_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN5_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN5_DMICCLK_SRC_WIDTH 2 /* (0x0335) ADC_Digital_Volume_5R */ #define MADERA_IN5R_MUTE 0x0100 #define MADERA_IN5R_MUTE_MASK 0x0100 #define MADERA_IN5R_MUTE_SHIFT 8 -#define MADERA_IN5R_MUTE_WIDTH 1 #define MADERA_IN5R_DIG_VOL_MASK 0x00FF #define MADERA_IN5R_DIG_VOL_SHIFT 0 -#define MADERA_IN5R_DIG_VOL_WIDTH 8 /* (0x0338) IN6L_Control */ #define MADERA_IN6L_HPF_MASK 0x8000 #define MADERA_IN6L_HPF_SHIFT 15 -#define MADERA_IN6L_HPF_WIDTH 1 #define MADERA_IN6_DMIC_SUP_MASK 0x1800 #define MADERA_IN6_DMIC_SUP_SHIFT 11 -#define MADERA_IN6_DMIC_SUP_WIDTH 2 /* (0x0339) ADC_Digital_Volume_6L */ #define MADERA_IN6L_MUTE 0x0100 #define MADERA_IN6L_MUTE_MASK 0x0100 #define MADERA_IN6L_MUTE_SHIFT 8 -#define MADERA_IN6L_MUTE_WIDTH 1 #define MADERA_IN6L_DIG_VOL_MASK 0x00FF #define MADERA_IN6L_DIG_VOL_SHIFT 0 -#define MADERA_IN6L_DIG_VOL_WIDTH 8 /* (0x033A) DMIC6L_Control */ #define MADERA_IN6_OSR_MASK 0x0700 #define MADERA_IN6_OSR_SHIFT 8 -#define MADERA_IN6_OSR_WIDTH 3 /* (0x033C) IN6R_Control */ #define MADERA_IN6R_HPF_MASK 0x8000 #define MADERA_IN6R_HPF_SHIFT 15 -#define MADERA_IN6R_HPF_WIDTH 1 /* (0x033D) ADC_Digital_Volume_6R */ #define MADERA_IN6R_MUTE 0x0100 #define MADERA_IN6R_MUTE_MASK 0x0100 #define MADERA_IN6R_MUTE_SHIFT 8 -#define MADERA_IN6R_MUTE_WIDTH 1 #define MADERA_IN6R_DIG_VOL_MASK 0x00FF #define MADERA_IN6R_DIG_VOL_SHIFT 0 -#define MADERA_IN6R_DIG_VOL_WIDTH 8 /* (0x033E) DMIC6R_Control */ #define MADERA_IN6_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN6_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN6_DMICCLK_SRC_WIDTH 2 /* (0x0400) Output_Enables_1 */ #define MADERA_EP_SEL 0x8000 #define MADERA_EP_SEL_MASK 0x8000 #define MADERA_EP_SEL_SHIFT 15 -#define MADERA_EP_SEL_WIDTH 1 #define MADERA_OUT6L_ENA 0x0800 #define MADERA_OUT6L_ENA_MASK 0x0800 #define MADERA_OUT6L_ENA_SHIFT 11 -#define MADERA_OUT6L_ENA_WIDTH 1 #define MADERA_OUT6R_ENA 0x0400 #define MADERA_OUT6R_ENA_MASK 0x0400 #define MADERA_OUT6R_ENA_SHIFT 10 -#define MADERA_OUT6R_ENA_WIDTH 1 #define MADERA_OUT5L_ENA 0x0200 #define MADERA_OUT5L_ENA_MASK 0x0200 #define MADERA_OUT5L_ENA_SHIFT 9 -#define MADERA_OUT5L_ENA_WIDTH 1 #define MADERA_OUT5R_ENA 0x0100 #define MADERA_OUT5R_ENA_MASK 0x0100 #define MADERA_OUT5R_ENA_SHIFT 8 -#define MADERA_OUT5R_ENA_WIDTH 1 #define MADERA_OUT4L_ENA 0x0080 #define MADERA_OUT4L_ENA_MASK 0x0080 #define MADERA_OUT4L_ENA_SHIFT 7 -#define MADERA_OUT4L_ENA_WIDTH 1 #define MADERA_OUT4R_ENA 0x0040 #define MADERA_OUT4R_ENA_MASK 0x0040 #define MADERA_OUT4R_ENA_SHIFT 6 -#define MADERA_OUT4R_ENA_WIDTH 1 #define MADERA_OUT3L_ENA 0x0020 #define MADERA_OUT3L_ENA_MASK 0x0020 #define MADERA_OUT3L_ENA_SHIFT 5 -#define MADERA_OUT3L_ENA_WIDTH 1 #define MADERA_OUT3R_ENA 0x0010 #define MADERA_OUT3R_ENA_MASK 0x0010 #define MADERA_OUT3R_ENA_SHIFT 4 -#define MADERA_OUT3R_ENA_WIDTH 1 #define MADERA_OUT2L_ENA 0x0008 #define MADERA_OUT2L_ENA_MASK 0x0008 #define MADERA_OUT2L_ENA_SHIFT 3 -#define MADERA_OUT2L_ENA_WIDTH 1 #define MADERA_OUT2R_ENA 0x0004 #define MADERA_OUT2R_ENA_MASK 0x0004 #define MADERA_OUT2R_ENA_SHIFT 2 -#define MADERA_OUT2R_ENA_WIDTH 1 #define MADERA_OUT1L_ENA 0x0002 #define MADERA_OUT1L_ENA_MASK 0x0002 #define MADERA_OUT1L_ENA_SHIFT 1 -#define MADERA_OUT1L_ENA_WIDTH 1 #define MADERA_OUT1R_ENA 0x0001 #define MADERA_OUT1R_ENA_MASK 0x0001 #define MADERA_OUT1R_ENA_SHIFT 0 -#define MADERA_OUT1R_ENA_WIDTH 1 /* (0x0408) Output_Rate_1 */ #define MADERA_CP_DAC_MODE_MASK 0x0040 #define MADERA_CP_DAC_MODE_SHIFT 6 -#define MADERA_CP_DAC_MODE_WIDTH 1 #define MADERA_OUT_EXT_CLK_DIV_MASK 0x0030 #define MADERA_OUT_EXT_CLK_DIV_SHIFT 4 -#define MADERA_OUT_EXT_CLK_DIV_WIDTH 2 #define MADERA_OUT_CLK_SRC_MASK 0x0007 #define MADERA_OUT_CLK_SRC_SHIFT 0 -#define MADERA_OUT_CLK_SRC_WIDTH 3 /* (0x0409) Output_Volume_Ramp */ #define MADERA_OUT_VD_RAMP_MASK 0x0070 #define MADERA_OUT_VD_RAMP_SHIFT 4 -#define MADERA_OUT_VD_RAMP_WIDTH 3 #define MADERA_OUT_VI_RAMP_MASK 0x0007 #define MADERA_OUT_VI_RAMP_SHIFT 0 -#define MADERA_OUT_VI_RAMP_WIDTH 3 /* (0x0410) Output_Path_Config_1L */ #define MADERA_OUT1_MONO 0x1000 #define MADERA_OUT1_MONO_MASK 0x1000 #define MADERA_OUT1_MONO_SHIFT 12 -#define MADERA_OUT1_MONO_WIDTH 1 #define MADERA_OUT1L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT1L_ANC_SRC_SHIFT 10 -#define MADERA_OUT1L_ANC_SRC_WIDTH 2 /* (0x0411) DAC_Digital_Volume_1L */ #define MADERA_OUT1L_VU 0x0200 #define MADERA_OUT1L_VU_MASK 0x0200 #define MADERA_OUT1L_VU_SHIFT 9 -#define MADERA_OUT1L_VU_WIDTH 1 #define MADERA_OUT1L_MUTE 0x0100 #define MADERA_OUT1L_MUTE_MASK 0x0100 #define MADERA_OUT1L_MUTE_SHIFT 8 -#define MADERA_OUT1L_MUTE_WIDTH 1 #define MADERA_OUT1L_VOL_MASK 0x00FF #define MADERA_OUT1L_VOL_SHIFT 0 -#define MADERA_OUT1L_VOL_WIDTH 8 /* (0x0412) Output_Path_Config_1 */ #define MADERA_HP1_GND_SEL_MASK 0x0007 #define MADERA_HP1_GND_SEL_SHIFT 0 -#define MADERA_HP1_GND_SEL_WIDTH 3 /* (0x0414) Output_Path_Config_1R */ #define MADERA_OUT1R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT1R_ANC_SRC_SHIFT 10 -#define MADERA_OUT1R_ANC_SRC_WIDTH 2 /* (0x0415) DAC_Digital_Volume_1R */ #define MADERA_OUT1R_MUTE 0x0100 #define MADERA_OUT1R_MUTE_MASK 0x0100 #define MADERA_OUT1R_MUTE_SHIFT 8 -#define MADERA_OUT1R_MUTE_WIDTH 1 #define MADERA_OUT1R_VOL_MASK 0x00FF #define MADERA_OUT1R_VOL_SHIFT 0 -#define MADERA_OUT1R_VOL_WIDTH 8 /* (0x0418) Output_Path_Config_2L */ #define MADERA_OUT2L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT2L_ANC_SRC_SHIFT 10 -#define MADERA_OUT2L_ANC_SRC_WIDTH 2 /* (0x0419) DAC_Digital_Volume_2L */ #define MADERA_OUT2L_MUTE 0x0100 #define MADERA_OUT2L_MUTE_MASK 0x0100 #define MADERA_OUT2L_MUTE_SHIFT 8 -#define MADERA_OUT2L_MUTE_WIDTH 1 #define MADERA_OUT2L_VOL_MASK 0x00FF #define MADERA_OUT2L_VOL_SHIFT 0 -#define MADERA_OUT2L_VOL_WIDTH 8 /* (0x041A) Output_Path_Config_2 */ #define MADERA_HP2_GND_SEL_MASK 0x0007 #define MADERA_HP2_GND_SEL_SHIFT 0 -#define MADERA_HP2_GND_SEL_WIDTH 3 /* (0x041C) Output_Path_Config_2R */ #define MADERA_OUT2R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT2R_ANC_SRC_SHIFT 10 -#define MADERA_OUT2R_ANC_SRC_WIDTH 2 /* (0x041D) DAC_Digital_Volume_2R */ #define MADERA_OUT2R_MUTE 0x0100 #define MADERA_OUT2R_MUTE_MASK 0x0100 #define MADERA_OUT2R_MUTE_SHIFT 8 -#define MADERA_OUT2R_MUTE_WIDTH 1 #define MADERA_OUT2R_VOL_MASK 0x00FF #define MADERA_OUT2R_VOL_SHIFT 0 -#define MADERA_OUT2R_VOL_WIDTH 8 /* (0x0420) Output_Path_Config_3L */ #define MADERA_OUT3L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT3L_ANC_SRC_SHIFT 10 -#define MADERA_OUT3L_ANC_SRC_WIDTH 2 /* (0x0421) DAC_Digital_Volume_3L */ #define MADERA_OUT3L_MUTE 0x0100 #define MADERA_OUT3L_MUTE_MASK 0x0100 #define MADERA_OUT3L_MUTE_SHIFT 8 -#define MADERA_OUT3L_MUTE_WIDTH 1 #define MADERA_OUT3L_VOL_MASK 0x00FF #define MADERA_OUT3L_VOL_SHIFT 0 -#define MADERA_OUT3L_VOL_WIDTH 8 /* (0x0424) Output_Path_Config_3R */ #define MADERA_OUT3R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT3R_ANC_SRC_SHIFT 10 -#define MADERA_OUT3R_ANC_SRC_WIDTH 2 /* (0x0425) DAC_Digital_Volume_3R */ #define MADERA_OUT3R_MUTE 0x0100 #define MADERA_OUT3R_MUTE_MASK 0x0100 #define MADERA_OUT3R_MUTE_SHIFT 8 -#define MADERA_OUT3R_MUTE_WIDTH 1 #define MADERA_OUT3R_VOL_MASK 0x00FF #define MADERA_OUT3R_VOL_SHIFT 0 -#define MADERA_OUT3R_VOL_WIDTH 8 /* (0x0428) Output_Path_Config_4L */ #define MADERA_OUT4L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT4L_ANC_SRC_SHIFT 10 -#define MADERA_OUT4L_ANC_SRC_WIDTH 2 /* (0x0429) DAC_Digital_Volume_4L */ #define MADERA_OUT4L_MUTE 0x0100 #define MADERA_OUT4L_MUTE_MASK 0x0100 #define MADERA_OUT4L_MUTE_SHIFT 8 -#define MADERA_OUT4L_MUTE_WIDTH 1 #define MADERA_OUT4L_VOL_MASK 0x00FF #define MADERA_OUT4L_VOL_SHIFT 0 -#define MADERA_OUT4L_VOL_WIDTH 8 /* (0x042C) Output_Path_Config_4R */ #define MADERA_OUT4R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT4R_ANC_SRC_SHIFT 10 -#define MADERA_OUT4R_ANC_SRC_WIDTH 2 /* (0x042D) DAC_Digital_Volume_4R */ #define MADERA_OUT4R_MUTE 0x0100 #define MADERA_OUT4R_MUTE_MASK 0x0100 #define MADERA_OUT4R_MUTE_SHIFT 8 -#define MADERA_OUT4R_MUTE_WIDTH 1 #define MADERA_OUT4R_VOL_MASK 0x00FF #define MADERA_OUT4R_VOL_SHIFT 0 -#define MADERA_OUT4R_VOL_WIDTH 8 /* (0x0430) Output_Path_Config_5L */ #define MADERA_OUT5_OSR 0x2000 #define MADERA_OUT5_OSR_MASK 0x2000 #define MADERA_OUT5_OSR_SHIFT 13 -#define MADERA_OUT5_OSR_WIDTH 1 #define MADERA_OUT5L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT5L_ANC_SRC_SHIFT 10 -#define MADERA_OUT5L_ANC_SRC_WIDTH 2 /* (0x0431) DAC_Digital_Volume_5L */ #define MADERA_OUT5L_MUTE 0x0100 #define MADERA_OUT5L_MUTE_MASK 0x0100 #define MADERA_OUT5L_MUTE_SHIFT 8 -#define MADERA_OUT5L_MUTE_WIDTH 1 #define MADERA_OUT5L_VOL_MASK 0x00FF #define MADERA_OUT5L_VOL_SHIFT 0 -#define MADERA_OUT5L_VOL_WIDTH 8 /* (0x0434) Output_Path_Config_5R */ #define MADERA_OUT5R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT5R_ANC_SRC_SHIFT 10 -#define MADERA_OUT5R_ANC_SRC_WIDTH 2 /* (0x0435) DAC_Digital_Volume_5R */ #define MADERA_OUT5R_MUTE 0x0100 #define MADERA_OUT5R_MUTE_MASK 0x0100 #define MADERA_OUT5R_MUTE_SHIFT 8 -#define MADERA_OUT5R_MUTE_WIDTH 1 #define MADERA_OUT5R_VOL_MASK 0x00FF #define MADERA_OUT5R_VOL_SHIFT 0 -#define MADERA_OUT5R_VOL_WIDTH 8 /* (0x0438) Output_Path_Config_6L */ #define MADERA_OUT6_OSR 0x2000 #define MADERA_OUT6_OSR_MASK 0x2000 #define MADERA_OUT6_OSR_SHIFT 13 -#define MADERA_OUT6_OSR_WIDTH 1 #define MADERA_OUT6L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT6L_ANC_SRC_SHIFT 10 -#define MADERA_OUT6L_ANC_SRC_WIDTH 2 /* (0x0439) DAC_Digital_Volume_6L */ #define MADERA_OUT6L_MUTE 0x0100 #define MADERA_OUT6L_MUTE_MASK 0x0100 #define MADERA_OUT6L_MUTE_SHIFT 8 -#define MADERA_OUT6L_MUTE_WIDTH 1 #define MADERA_OUT6L_VOL_MASK 0x00FF #define MADERA_OUT6L_VOL_SHIFT 0 -#define MADERA_OUT6L_VOL_WIDTH 8 /* (0x043C) Output_Path_Config_6R */ #define MADERA_OUT6R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT6R_ANC_SRC_SHIFT 10 -#define MADERA_OUT6R_ANC_SRC_WIDTH 2 /* (0x043D) DAC_Digital_Volume_6R */ #define MADERA_OUT6R_MUTE 0x0100 #define MADERA_OUT6R_MUTE_MASK 0x0100 #define MADERA_OUT6R_MUTE_SHIFT 8 -#define MADERA_OUT6R_MUTE_WIDTH 1 #define MADERA_OUT6R_VOL_MASK 0x00FF #define MADERA_OUT6R_VOL_SHIFT 0 -#define MADERA_OUT6R_VOL_WIDTH 8 /* (0x0450) - DAC AEC Control 1 */ #define MADERA_AEC1_LOOPBACK_SRC_MASK 0x003C #define MADERA_AEC1_LOOPBACK_SRC_SHIFT 2 -#define MADERA_AEC1_LOOPBACK_SRC_WIDTH 4 #define MADERA_AEC1_ENA_STS 0x0002 #define MADERA_AEC1_ENA_STS_MASK 0x0002 #define MADERA_AEC1_ENA_STS_SHIFT 1 -#define MADERA_AEC1_ENA_STS_WIDTH 1 #define MADERA_AEC1_LOOPBACK_ENA 0x0001 #define MADERA_AEC1_LOOPBACK_ENA_MASK 0x0001 #define MADERA_AEC1_LOOPBACK_ENA_SHIFT 0 -#define MADERA_AEC1_LOOPBACK_ENA_WIDTH 1 /* (0x0451) DAC_AEC_Control_2 */ #define MADERA_AEC2_LOOPBACK_SRC_MASK 0x003C #define MADERA_AEC2_LOOPBACK_SRC_SHIFT 2 -#define MADERA_AEC2_LOOPBACK_SRC_WIDTH 4 #define MADERA_AEC2_ENA_STS 0x0002 #define MADERA_AEC2_ENA_STS_MASK 0x0002 #define MADERA_AEC2_ENA_STS_SHIFT 1 -#define MADERA_AEC2_ENA_STS_WIDTH 1 #define MADERA_AEC2_LOOPBACK_ENA 0x0001 #define MADERA_AEC2_LOOPBACK_ENA_MASK 0x0001 #define MADERA_AEC2_LOOPBACK_ENA_SHIFT 0 -#define MADERA_AEC2_LOOPBACK_ENA_WIDTH 1 /* (0x0458) Noise_Gate_Control */ #define MADERA_NGATE_HOLD_MASK 0x0030 #define MADERA_NGATE_HOLD_SHIFT 4 -#define MADERA_NGATE_HOLD_WIDTH 2 #define MADERA_NGATE_THR_MASK 0x000E #define MADERA_NGATE_THR_SHIFT 1 -#define MADERA_NGATE_THR_WIDTH 3 #define MADERA_NGATE_ENA 0x0001 #define MADERA_NGATE_ENA_MASK 0x0001 #define MADERA_NGATE_ENA_SHIFT 0 -#define MADERA_NGATE_ENA_WIDTH 1 /* (0x0490) PDM_SPK1_CTRL_1 */ #define MADERA_SPK1R_MUTE 0x2000 #define MADERA_SPK1R_MUTE_MASK 0x2000 #define MADERA_SPK1R_MUTE_SHIFT 13 -#define MADERA_SPK1R_MUTE_WIDTH 1 #define MADERA_SPK1L_MUTE 0x1000 #define MADERA_SPK1L_MUTE_MASK 0x1000 #define MADERA_SPK1L_MUTE_SHIFT 12 -#define MADERA_SPK1L_MUTE_WIDTH 1 #define MADERA_SPK1_MUTE_ENDIAN 0x0100 #define MADERA_SPK1_MUTE_ENDIAN_MASK 0x0100 #define MADERA_SPK1_MUTE_ENDIAN_SHIFT 8 -#define MADERA_SPK1_MUTE_ENDIAN_WIDTH 1 #define MADERA_SPK1_MUTE_SEQ1_MASK 0x00FF #define MADERA_SPK1_MUTE_SEQ1_SHIFT 0 -#define MADERA_SPK1_MUTE_SEQ1_WIDTH 8 /* (0x0491) PDM_SPK1_CTRL_2 */ #define MADERA_SPK1_FMT 0x0001 #define MADERA_SPK1_FMT_MASK 0x0001 #define MADERA_SPK1_FMT_SHIFT 0 -#define MADERA_SPK1_FMT_WIDTH 1 /* (0x0492) PDM_SPK2_CTRL_1 */ #define MADERA_SPK2R_MUTE 0x2000 #define MADERA_SPK2R_MUTE_MASK 0x2000 #define MADERA_SPK2R_MUTE_SHIFT 13 -#define MADERA_SPK2R_MUTE_WIDTH 1 #define MADERA_SPK2L_MUTE 0x1000 #define MADERA_SPK2L_MUTE_MASK 0x1000 #define MADERA_SPK2L_MUTE_SHIFT 12 -#define MADERA_SPK2L_MUTE_WIDTH 1 /* (0x04A0) - HP1 Short Circuit Ctrl */ #define MADERA_HP1_SC_ENA 0x1000 #define MADERA_HP1_SC_ENA_MASK 0x1000 #define MADERA_HP1_SC_ENA_SHIFT 12 -#define MADERA_HP1_SC_ENA_WIDTH 1 /* (0x04A1) - HP2 Short Circuit Ctrl */ #define MADERA_HP2_SC_ENA 0x1000 #define MADERA_HP2_SC_ENA_MASK 0x1000 #define MADERA_HP2_SC_ENA_SHIFT 12 -#define MADERA_HP2_SC_ENA_WIDTH 1 /* (0x04A2) - HP3 Short Circuit Ctrl */ #define MADERA_HP3_SC_ENA 0x1000 #define MADERA_HP3_SC_ENA_MASK 0x1000 #define MADERA_HP3_SC_ENA_SHIFT 12 -#define MADERA_HP3_SC_ENA_WIDTH 1 /* (0x04A8) - HP_Test_Ctrl_5 */ #define MADERA_HP1L_ONEFLT 0x0100 #define MADERA_HP1L_ONEFLT_MASK 0x0100 #define MADERA_HP1L_ONEFLT_SHIFT 8 -#define MADERA_HP1L_ONEFLT_WIDTH 1 /* (0x04A9) - HP_Test_Ctrl_6 */ #define MADERA_HP1R_ONEFLT 0x0100 #define MADERA_HP1R_ONEFLT_MASK 0x0100 #define MADERA_HP1R_ONEFLT_SHIFT 8 -#define MADERA_HP1R_ONEFLT_WIDTH 1 /* (0x0500) AIF1_BCLK_Ctrl */ #define MADERA_AIF1_BCLK_INV 0x0080 #define MADERA_AIF1_BCLK_INV_MASK 0x0080 #define MADERA_AIF1_BCLK_INV_SHIFT 7 -#define MADERA_AIF1_BCLK_INV_WIDTH 1 #define MADERA_AIF1_BCLK_MSTR 0x0020 #define MADERA_AIF1_BCLK_MSTR_MASK 0x0020 #define MADERA_AIF1_BCLK_MSTR_SHIFT 5 -#define MADERA_AIF1_BCLK_MSTR_WIDTH 1 #define MADERA_AIF1_BCLK_FREQ_MASK 0x001F #define MADERA_AIF1_BCLK_FREQ_SHIFT 0 -#define MADERA_AIF1_BCLK_FREQ_WIDTH 5 /* (0x0501) AIF1_Tx_Pin_Ctrl */ #define MADERA_AIF1TX_LRCLK_SRC 0x0008 #define MADERA_AIF1TX_LRCLK_SRC_MASK 0x0008 #define MADERA_AIF1TX_LRCLK_SRC_SHIFT 3 -#define MADERA_AIF1TX_LRCLK_SRC_WIDTH 1 #define MADERA_AIF1TX_LRCLK_INV 0x0004 #define MADERA_AIF1TX_LRCLK_INV_MASK 0x0004 #define MADERA_AIF1TX_LRCLK_INV_SHIFT 2 -#define MADERA_AIF1TX_LRCLK_INV_WIDTH 1 #define MADERA_AIF1TX_LRCLK_MSTR 0x0001 #define MADERA_AIF1TX_LRCLK_MSTR_MASK 0x0001 #define MADERA_AIF1TX_LRCLK_MSTR_SHIFT 0 -#define MADERA_AIF1TX_LRCLK_MSTR_WIDTH 1 /* (0x0502) AIF1_Rx_Pin_Ctrl */ #define MADERA_AIF1RX_LRCLK_INV 0x0004 #define MADERA_AIF1RX_LRCLK_INV_MASK 0x0004 #define MADERA_AIF1RX_LRCLK_INV_SHIFT 2 -#define MADERA_AIF1RX_LRCLK_INV_WIDTH 1 #define MADERA_AIF1RX_LRCLK_FRC 0x0002 #define MADERA_AIF1RX_LRCLK_FRC_MASK 0x0002 #define MADERA_AIF1RX_LRCLK_FRC_SHIFT 1 -#define MADERA_AIF1RX_LRCLK_FRC_WIDTH 1 #define MADERA_AIF1RX_LRCLK_MSTR 0x0001 #define MADERA_AIF1RX_LRCLK_MSTR_MASK 0x0001 #define MADERA_AIF1RX_LRCLK_MSTR_SHIFT 0 -#define MADERA_AIF1RX_LRCLK_MSTR_WIDTH 1 /* (0x0503) AIF1_Rate_Ctrl */ #define MADERA_AIF1_RATE_MASK 0xF800 #define MADERA_AIF1_RATE_SHIFT 11 -#define MADERA_AIF1_RATE_WIDTH 5 #define MADERA_AIF1_TRI 0x0040 #define MADERA_AIF1_TRI_MASK 0x0040 #define MADERA_AIF1_TRI_SHIFT 6 -#define MADERA_AIF1_TRI_WIDTH 1 /* (0x0504) AIF1_Format */ #define MADERA_AIF1_FMT_MASK 0x0007 #define MADERA_AIF1_FMT_SHIFT 0 -#define MADERA_AIF1_FMT_WIDTH 3 /* (0x0506) AIF1_Rx_BCLK_Rate */ #define MADERA_AIF1RX_BCPF_MASK 0x1FFF #define MADERA_AIF1RX_BCPF_SHIFT 0 -#define MADERA_AIF1RX_BCPF_WIDTH 13 /* (0x0507) AIF1_Frame_Ctrl_1 */ #define MADERA_AIF1TX_WL_MASK 0x3F00 #define MADERA_AIF1TX_WL_SHIFT 8 -#define MADERA_AIF1TX_WL_WIDTH 6 #define MADERA_AIF1TX_SLOT_LEN_MASK 0x00FF #define MADERA_AIF1TX_SLOT_LEN_SHIFT 0 -#define MADERA_AIF1TX_SLOT_LEN_WIDTH 8 /* (0x0508) AIF1_Frame_Ctrl_2 */ #define MADERA_AIF1RX_WL_MASK 0x3F00 #define MADERA_AIF1RX_WL_SHIFT 8 -#define MADERA_AIF1RX_WL_WIDTH 6 #define MADERA_AIF1RX_SLOT_LEN_MASK 0x00FF #define MADERA_AIF1RX_SLOT_LEN_SHIFT 0 -#define MADERA_AIF1RX_SLOT_LEN_WIDTH 8 /* (0x0509) AIF1_Frame_Ctrl_3 */ #define MADERA_AIF1TX1_SLOT_MASK 0x003F #define MADERA_AIF1TX1_SLOT_SHIFT 0 -#define MADERA_AIF1TX1_SLOT_WIDTH 6 /* (0x0519) AIF1_Tx_Enables */ #define MADERA_AIF1TX8_ENA 0x0080 #define MADERA_AIF1TX8_ENA_MASK 0x0080 #define MADERA_AIF1TX8_ENA_SHIFT 7 -#define MADERA_AIF1TX8_ENA_WIDTH 1 #define MADERA_AIF1TX7_ENA 0x0040 #define MADERA_AIF1TX7_ENA_MASK 0x0040 #define MADERA_AIF1TX7_ENA_SHIFT 6 -#define MADERA_AIF1TX7_ENA_WIDTH 1 #define MADERA_AIF1TX6_ENA 0x0020 #define MADERA_AIF1TX6_ENA_MASK 0x0020 #define MADERA_AIF1TX6_ENA_SHIFT 5 -#define MADERA_AIF1TX6_ENA_WIDTH 1 #define MADERA_AIF1TX5_ENA 0x0010 #define MADERA_AIF1TX5_ENA_MASK 0x0010 #define MADERA_AIF1TX5_ENA_SHIFT 4 -#define MADERA_AIF1TX5_ENA_WIDTH 1 #define MADERA_AIF1TX4_ENA 0x0008 #define MADERA_AIF1TX4_ENA_MASK 0x0008 #define MADERA_AIF1TX4_ENA_SHIFT 3 -#define MADERA_AIF1TX4_ENA_WIDTH 1 #define MADERA_AIF1TX3_ENA 0x0004 #define MADERA_AIF1TX3_ENA_MASK 0x0004 #define MADERA_AIF1TX3_ENA_SHIFT 2 -#define MADERA_AIF1TX3_ENA_WIDTH 1 #define MADERA_AIF1TX2_ENA 0x0002 #define MADERA_AIF1TX2_ENA_MASK 0x0002 #define MADERA_AIF1TX2_ENA_SHIFT 1 -#define MADERA_AIF1TX2_ENA_WIDTH 1 #define MADERA_AIF1TX1_ENA 0x0001 #define MADERA_AIF1TX1_ENA_MASK 0x0001 #define MADERA_AIF1TX1_ENA_SHIFT 0 -#define MADERA_AIF1TX1_ENA_WIDTH 1 /* (0x051A) AIF1_Rx_Enables */ #define MADERA_AIF1RX8_ENA 0x0080 #define MADERA_AIF1RX8_ENA_MASK 0x0080 #define MADERA_AIF1RX8_ENA_SHIFT 7 -#define MADERA_AIF1RX8_ENA_WIDTH 1 #define MADERA_AIF1RX7_ENA 0x0040 #define MADERA_AIF1RX7_ENA_MASK 0x0040 #define MADERA_AIF1RX7_ENA_SHIFT 6 -#define MADERA_AIF1RX7_ENA_WIDTH 1 #define MADERA_AIF1RX6_ENA 0x0020 #define MADERA_AIF1RX6_ENA_MASK 0x0020 #define MADERA_AIF1RX6_ENA_SHIFT 5 -#define MADERA_AIF1RX6_ENA_WIDTH 1 #define MADERA_AIF1RX5_ENA 0x0010 #define MADERA_AIF1RX5_ENA_MASK 0x0010 #define MADERA_AIF1RX5_ENA_SHIFT 4 -#define MADERA_AIF1RX5_ENA_WIDTH 1 #define MADERA_AIF1RX4_ENA 0x0008 #define MADERA_AIF1RX4_ENA_MASK 0x0008 #define MADERA_AIF1RX4_ENA_SHIFT 3 -#define MADERA_AIF1RX4_ENA_WIDTH 1 #define MADERA_AIF1RX3_ENA 0x0004 #define MADERA_AIF1RX3_ENA_MASK 0x0004 #define MADERA_AIF1RX3_ENA_SHIFT 2 -#define MADERA_AIF1RX3_ENA_WIDTH 1 #define MADERA_AIF1RX2_ENA 0x0002 #define MADERA_AIF1RX2_ENA_MASK 0x0002 #define MADERA_AIF1RX2_ENA_SHIFT 1 -#define MADERA_AIF1RX2_ENA_WIDTH 1 #define MADERA_AIF1RX1_ENA 0x0001 #define MADERA_AIF1RX1_ENA_MASK 0x0001 #define MADERA_AIF1RX1_ENA_SHIFT 0 -#define MADERA_AIF1RX1_ENA_WIDTH 1 /* (0x0559) AIF2_Tx_Enables */ #define MADERA_AIF2TX8_ENA 0x0080 #define MADERA_AIF2TX8_ENA_MASK 0x0080 #define MADERA_AIF2TX8_ENA_SHIFT 7 -#define MADERA_AIF2TX8_ENA_WIDTH 1 #define MADERA_AIF2TX7_ENA 0x0040 #define MADERA_AIF2TX7_ENA_MASK 0x0040 #define MADERA_AIF2TX7_ENA_SHIFT 6 -#define MADERA_AIF2TX7_ENA_WIDTH 1 #define MADERA_AIF2TX6_ENA 0x0020 #define MADERA_AIF2TX6_ENA_MASK 0x0020 #define MADERA_AIF2TX6_ENA_SHIFT 5 -#define MADERA_AIF2TX6_ENA_WIDTH 1 #define MADERA_AIF2TX5_ENA 0x0010 #define MADERA_AIF2TX5_ENA_MASK 0x0010 #define MADERA_AIF2TX5_ENA_SHIFT 4 -#define MADERA_AIF2TX5_ENA_WIDTH 1 #define MADERA_AIF2TX4_ENA 0x0008 #define MADERA_AIF2TX4_ENA_MASK 0x0008 #define MADERA_AIF2TX4_ENA_SHIFT 3 -#define MADERA_AIF2TX4_ENA_WIDTH 1 #define MADERA_AIF2TX3_ENA 0x0004 #define MADERA_AIF2TX3_ENA_MASK 0x0004 #define MADERA_AIF2TX3_ENA_SHIFT 2 -#define MADERA_AIF2TX3_ENA_WIDTH 1 #define MADERA_AIF2TX2_ENA 0x0002 #define MADERA_AIF2TX2_ENA_MASK 0x0002 #define MADERA_AIF2TX2_ENA_SHIFT 1 -#define MADERA_AIF2TX2_ENA_WIDTH 1 #define MADERA_AIF2TX1_ENA 0x0001 #define MADERA_AIF2TX1_ENA_MASK 0x0001 #define MADERA_AIF2TX1_ENA_SHIFT 0 -#define MADERA_AIF2TX1_ENA_WIDTH 1 /* (0x055A) AIF2_Rx_Enables */ #define MADERA_AIF2RX8_ENA 0x0080 #define MADERA_AIF2RX8_ENA_MASK 0x0080 #define MADERA_AIF2RX8_ENA_SHIFT 7 -#define MADERA_AIF2RX8_ENA_WIDTH 1 #define MADERA_AIF2RX7_ENA 0x0040 #define MADERA_AIF2RX7_ENA_MASK 0x0040 #define MADERA_AIF2RX7_ENA_SHIFT 6 -#define MADERA_AIF2RX7_ENA_WIDTH 1 #define MADERA_AIF2RX6_ENA 0x0020 #define MADERA_AIF2RX6_ENA_MASK 0x0020 #define MADERA_AIF2RX6_ENA_SHIFT 5 -#define MADERA_AIF2RX6_ENA_WIDTH 1 #define MADERA_AIF2RX5_ENA 0x0010 #define MADERA_AIF2RX5_ENA_MASK 0x0010 #define MADERA_AIF2RX5_ENA_SHIFT 4 -#define MADERA_AIF2RX5_ENA_WIDTH 1 #define MADERA_AIF2RX4_ENA 0x0008 #define MADERA_AIF2RX4_ENA_MASK 0x0008 #define MADERA_AIF2RX4_ENA_SHIFT 3 -#define MADERA_AIF2RX4_ENA_WIDTH 1 #define MADERA_AIF2RX3_ENA 0x0004 #define MADERA_AIF2RX3_ENA_MASK 0x0004 #define MADERA_AIF2RX3_ENA_SHIFT 2 -#define MADERA_AIF2RX3_ENA_WIDTH 1 #define MADERA_AIF2RX2_ENA 0x0002 #define MADERA_AIF2RX2_ENA_MASK 0x0002 #define MADERA_AIF2RX2_ENA_SHIFT 1 -#define MADERA_AIF2RX2_ENA_WIDTH 1 #define MADERA_AIF2RX1_ENA 0x0001 #define MADERA_AIF2RX1_ENA_MASK 0x0001 #define MADERA_AIF2RX1_ENA_SHIFT 0 -#define MADERA_AIF2RX1_ENA_WIDTH 1 /* (0x0599) AIF3_Tx_Enables */ #define MADERA_AIF3TX8_ENA 0x0080 #define MADERA_AIF3TX8_ENA_MASK 0x0080 #define MADERA_AIF3TX8_ENA_SHIFT 7 -#define MADERA_AIF3TX8_ENA_WIDTH 1 #define MADERA_AIF3TX7_ENA 0x0040 #define MADERA_AIF3TX7_ENA_MASK 0x0040 #define MADERA_AIF3TX7_ENA_SHIFT 6 -#define MADERA_AIF3TX7_ENA_WIDTH 1 #define MADERA_AIF3TX6_ENA 0x0020 #define MADERA_AIF3TX6_ENA_MASK 0x0020 #define MADERA_AIF3TX6_ENA_SHIFT 5 -#define MADERA_AIF3TX6_ENA_WIDTH 1 #define MADERA_AIF3TX5_ENA 0x0010 #define MADERA_AIF3TX5_ENA_MASK 0x0010 #define MADERA_AIF3TX5_ENA_SHIFT 4 -#define MADERA_AIF3TX5_ENA_WIDTH 1 #define MADERA_AIF3TX4_ENA 0x0008 #define MADERA_AIF3TX4_ENA_MASK 0x0008 #define MADERA_AIF3TX4_ENA_SHIFT 3 -#define MADERA_AIF3TX4_ENA_WIDTH 1 #define MADERA_AIF3TX3_ENA 0x0004 #define MADERA_AIF3TX3_ENA_MASK 0x0004 #define MADERA_AIF3TX3_ENA_SHIFT 2 -#define MADERA_AIF3TX3_ENA_WIDTH 1 #define MADERA_AIF3TX2_ENA 0x0002 #define MADERA_AIF3TX2_ENA_MASK 0x0002 #define MADERA_AIF3TX2_ENA_SHIFT 1 -#define MADERA_AIF3TX2_ENA_WIDTH 1 #define MADERA_AIF3TX1_ENA 0x0001 #define MADERA_AIF3TX1_ENA_MASK 0x0001 #define MADERA_AIF3TX1_ENA_SHIFT 0 -#define MADERA_AIF3TX1_ENA_WIDTH 1 /* (0x059A) AIF3_Rx_Enables */ #define MADERA_AIF3RX8_ENA 0x0080 #define MADERA_AIF3RX8_ENA_MASK 0x0080 #define MADERA_AIF3RX8_ENA_SHIFT 7 -#define MADERA_AIF3RX8_ENA_WIDTH 1 #define MADERA_AIF3RX7_ENA 0x0040 #define MADERA_AIF3RX7_ENA_MASK 0x0040 #define MADERA_AIF3RX7_ENA_SHIFT 6 -#define MADERA_AIF3RX7_ENA_WIDTH 1 #define MADERA_AIF3RX6_ENA 0x0020 #define MADERA_AIF3RX6_ENA_MASK 0x0020 #define MADERA_AIF3RX6_ENA_SHIFT 5 -#define MADERA_AIF3RX6_ENA_WIDTH 1 #define MADERA_AIF3RX5_ENA 0x0010 #define MADERA_AIF3RX5_ENA_MASK 0x0010 #define MADERA_AIF3RX5_ENA_SHIFT 4 -#define MADERA_AIF3RX5_ENA_WIDTH 1 #define MADERA_AIF3RX4_ENA 0x0008 #define MADERA_AIF3RX4_ENA_MASK 0x0008 #define MADERA_AIF3RX4_ENA_SHIFT 3 -#define MADERA_AIF3RX4_ENA_WIDTH 1 #define MADERA_AIF3RX3_ENA 0x0004 #define MADERA_AIF3RX3_ENA_MASK 0x0004 #define MADERA_AIF3RX3_ENA_SHIFT 2 -#define MADERA_AIF3RX3_ENA_WIDTH 1 #define MADERA_AIF3RX2_ENA 0x0002 #define MADERA_AIF3RX2_ENA_MASK 0x0002 #define MADERA_AIF3RX2_ENA_SHIFT 1 -#define MADERA_AIF3RX2_ENA_WIDTH 1 #define MADERA_AIF3RX1_ENA 0x0001 #define MADERA_AIF3RX1_ENA_MASK 0x0001 #define MADERA_AIF3RX1_ENA_SHIFT 0 -#define MADERA_AIF3RX1_ENA_WIDTH 1 /* (0x05B9) AIF4_Tx_Enables */ #define MADERA_AIF4TX2_ENA 0x0002 #define MADERA_AIF4TX2_ENA_MASK 0x0002 #define MADERA_AIF4TX2_ENA_SHIFT 1 -#define MADERA_AIF4TX2_ENA_WIDTH 1 #define MADERA_AIF4TX1_ENA 0x0001 #define MADERA_AIF4TX1_ENA_MASK 0x0001 #define MADERA_AIF4TX1_ENA_SHIFT 0 -#define MADERA_AIF4TX1_ENA_WIDTH 1 /* (0x05BA) AIF4_Rx_Enables */ #define MADERA_AIF4RX2_ENA 0x0002 #define MADERA_AIF4RX2_ENA_MASK 0x0002 #define MADERA_AIF4RX2_ENA_SHIFT 1 -#define MADERA_AIF4RX2_ENA_WIDTH 1 #define MADERA_AIF4RX1_ENA 0x0001 #define MADERA_AIF4RX1_ENA_MASK 0x0001 #define MADERA_AIF4RX1_ENA_SHIFT 0 -#define MADERA_AIF4RX1_ENA_WIDTH 1 /* (0x05C2) SPD1_TX_Control */ #define MADERA_SPD1_VAL2 0x2000 #define MADERA_SPD1_VAL2_MASK 0x2000 #define MADERA_SPD1_VAL2_SHIFT 13 -#define MADERA_SPD1_VAL2_WIDTH 1 #define MADERA_SPD1_VAL1 0x1000 #define MADERA_SPD1_VAL1_MASK 0x1000 #define MADERA_SPD1_VAL1_SHIFT 12 -#define MADERA_SPD1_VAL1_WIDTH 1 #define MADERA_SPD1_RATE_MASK 0x00F0 #define MADERA_SPD1_RATE_SHIFT 4 -#define MADERA_SPD1_RATE_WIDTH 4 #define MADERA_SPD1_ENA 0x0001 #define MADERA_SPD1_ENA_MASK 0x0001 #define MADERA_SPD1_ENA_SHIFT 0 -#define MADERA_SPD1_ENA_WIDTH 1 /* (0x05F5) SLIMbus_RX_Channel_Enable */ #define MADERA_SLIMRX8_ENA 0x0080 #define MADERA_SLIMRX8_ENA_MASK 0x0080 #define MADERA_SLIMRX8_ENA_SHIFT 7 -#define MADERA_SLIMRX8_ENA_WIDTH 1 #define MADERA_SLIMRX7_ENA 0x0040 #define MADERA_SLIMRX7_ENA_MASK 0x0040 #define MADERA_SLIMRX7_ENA_SHIFT 6 -#define MADERA_SLIMRX7_ENA_WIDTH 1 #define MADERA_SLIMRX6_ENA 0x0020 #define MADERA_SLIMRX6_ENA_MASK 0x0020 #define MADERA_SLIMRX6_ENA_SHIFT 5 -#define MADERA_SLIMRX6_ENA_WIDTH 1 #define MADERA_SLIMRX5_ENA 0x0010 #define MADERA_SLIMRX5_ENA_MASK 0x0010 #define MADERA_SLIMRX5_ENA_SHIFT 4 -#define MADERA_SLIMRX5_ENA_WIDTH 1 #define MADERA_SLIMRX4_ENA 0x0008 #define MADERA_SLIMRX4_ENA_MASK 0x0008 #define MADERA_SLIMRX4_ENA_SHIFT 3 -#define MADERA_SLIMRX4_ENA_WIDTH 1 #define MADERA_SLIMRX3_ENA 0x0004 #define MADERA_SLIMRX3_ENA_MASK 0x0004 #define MADERA_SLIMRX3_ENA_SHIFT 2 -#define MADERA_SLIMRX3_ENA_WIDTH 1 #define MADERA_SLIMRX2_ENA 0x0002 #define MADERA_SLIMRX2_ENA_MASK 0x0002 #define MADERA_SLIMRX2_ENA_SHIFT 1 -#define MADERA_SLIMRX2_ENA_WIDTH 1 #define MADERA_SLIMRX1_ENA 0x0001 #define MADERA_SLIMRX1_ENA_MASK 0x0001 #define MADERA_SLIMRX1_ENA_SHIFT 0 -#define MADERA_SLIMRX1_ENA_WIDTH 1 /* (0x05F6) SLIMbus_TX_Channel_Enable */ #define MADERA_SLIMTX8_ENA 0x0080 #define MADERA_SLIMTX8_ENA_MASK 0x0080 #define MADERA_SLIMTX8_ENA_SHIFT 7 -#define MADERA_SLIMTX8_ENA_WIDTH 1 #define MADERA_SLIMTX7_ENA 0x0040 #define MADERA_SLIMTX7_ENA_MASK 0x0040 #define MADERA_SLIMTX7_ENA_SHIFT 6 -#define MADERA_SLIMTX7_ENA_WIDTH 1 #define MADERA_SLIMTX6_ENA 0x0020 #define MADERA_SLIMTX6_ENA_MASK 0x0020 #define MADERA_SLIMTX6_ENA_SHIFT 5 -#define MADERA_SLIMTX6_ENA_WIDTH 1 #define MADERA_SLIMTX5_ENA 0x0010 #define MADERA_SLIMTX5_ENA_MASK 0x0010 #define MADERA_SLIMTX5_ENA_SHIFT 4 -#define MADERA_SLIMTX5_ENA_WIDTH 1 #define MADERA_SLIMTX4_ENA 0x0008 #define MADERA_SLIMTX4_ENA_MASK 0x0008 #define MADERA_SLIMTX4_ENA_SHIFT 3 -#define MADERA_SLIMTX4_ENA_WIDTH 1 #define MADERA_SLIMTX3_ENA 0x0004 #define MADERA_SLIMTX3_ENA_MASK 0x0004 #define MADERA_SLIMTX3_ENA_SHIFT 2 -#define MADERA_SLIMTX3_ENA_WIDTH 1 #define MADERA_SLIMTX2_ENA 0x0002 #define MADERA_SLIMTX2_ENA_MASK 0x0002 #define MADERA_SLIMTX2_ENA_SHIFT 1 -#define MADERA_SLIMTX2_ENA_WIDTH 1 #define MADERA_SLIMTX1_ENA 0x0001 #define MADERA_SLIMTX1_ENA_MASK 0x0001 #define MADERA_SLIMTX1_ENA_SHIFT 0 -#define MADERA_SLIMTX1_ENA_WIDTH 1 /* (0x0E10) EQ1_1 */ #define MADERA_EQ1_B1_GAIN_MASK 0xF800 #define MADERA_EQ1_B1_GAIN_SHIFT 11 -#define MADERA_EQ1_B1_GAIN_WIDTH 5 #define MADERA_EQ1_B2_GAIN_MASK 0x07C0 #define MADERA_EQ1_B2_GAIN_SHIFT 6 -#define MADERA_EQ1_B2_GAIN_WIDTH 5 #define MADERA_EQ1_B3_GAIN_MASK 0x003E #define MADERA_EQ1_B3_GAIN_SHIFT 1 -#define MADERA_EQ1_B3_GAIN_WIDTH 5 #define MADERA_EQ1_ENA 0x0001 #define MADERA_EQ1_ENA_MASK 0x0001 #define MADERA_EQ1_ENA_SHIFT 0 -#define MADERA_EQ1_ENA_WIDTH 1 /* (0x0E11) EQ1_2 */ #define MADERA_EQ1_B4_GAIN_MASK 0xF800 #define MADERA_EQ1_B4_GAIN_SHIFT 11 -#define MADERA_EQ1_B4_GAIN_WIDTH 5 #define MADERA_EQ1_B5_GAIN_MASK 0x07C0 #define MADERA_EQ1_B5_GAIN_SHIFT 6 -#define MADERA_EQ1_B5_GAIN_WIDTH 5 #define MADERA_EQ1_B1_MODE 0x0001 #define MADERA_EQ1_B1_MODE_MASK 0x0001 #define MADERA_EQ1_B1_MODE_SHIFT 0 -#define MADERA_EQ1_B1_MODE_WIDTH 1 /* (0x0E26) EQ2_1 */ #define MADERA_EQ2_B1_GAIN_MASK 0xF800 #define MADERA_EQ2_B1_GAIN_SHIFT 11 -#define MADERA_EQ2_B1_GAIN_WIDTH 5 #define MADERA_EQ2_B2_GAIN_MASK 0x07C0 #define MADERA_EQ2_B2_GAIN_SHIFT 6 -#define MADERA_EQ2_B2_GAIN_WIDTH 5 #define MADERA_EQ2_B3_GAIN_MASK 0x003E #define MADERA_EQ2_B3_GAIN_SHIFT 1 -#define MADERA_EQ2_B3_GAIN_WIDTH 5 #define MADERA_EQ2_ENA 0x0001 #define MADERA_EQ2_ENA_MASK 0x0001 #define MADERA_EQ2_ENA_SHIFT 0 -#define MADERA_EQ2_ENA_WIDTH 1 /* (0x0E27) EQ2_2 */ #define MADERA_EQ2_B4_GAIN_MASK 0xF800 #define MADERA_EQ2_B4_GAIN_SHIFT 11 -#define MADERA_EQ2_B4_GAIN_WIDTH 5 #define MADERA_EQ2_B5_GAIN_MASK 0x07C0 #define MADERA_EQ2_B5_GAIN_SHIFT 6 -#define MADERA_EQ2_B5_GAIN_WIDTH 5 #define MADERA_EQ2_B1_MODE 0x0001 #define MADERA_EQ2_B1_MODE_MASK 0x0001 #define MADERA_EQ2_B1_MODE_SHIFT 0 -#define MADERA_EQ2_B1_MODE_WIDTH 1 /* (0x0E3C) EQ3_1 */ #define MADERA_EQ3_B1_GAIN_MASK 0xF800 #define MADERA_EQ3_B1_GAIN_SHIFT 11 -#define MADERA_EQ3_B1_GAIN_WIDTH 5 #define MADERA_EQ3_B2_GAIN_MASK 0x07C0 #define MADERA_EQ3_B2_GAIN_SHIFT 6 -#define MADERA_EQ3_B2_GAIN_WIDTH 5 #define MADERA_EQ3_B3_GAIN_MASK 0x003E #define MADERA_EQ3_B3_GAIN_SHIFT 1 -#define MADERA_EQ3_B3_GAIN_WIDTH 5 #define MADERA_EQ3_ENA 0x0001 #define MADERA_EQ3_ENA_MASK 0x0001 #define MADERA_EQ3_ENA_SHIFT 0 -#define MADERA_EQ3_ENA_WIDTH 1 /* (0x0E3D) EQ3_2 */ #define MADERA_EQ3_B4_GAIN_MASK 0xF800 #define MADERA_EQ3_B4_GAIN_SHIFT 11 -#define MADERA_EQ3_B4_GAIN_WIDTH 5 #define MADERA_EQ3_B5_GAIN_MASK 0x07C0 #define MADERA_EQ3_B5_GAIN_SHIFT 6 -#define MADERA_EQ3_B5_GAIN_WIDTH 5 #define MADERA_EQ3_B1_MODE 0x0001 #define MADERA_EQ3_B1_MODE_MASK 0x0001 #define MADERA_EQ3_B1_MODE_SHIFT 0 -#define MADERA_EQ3_B1_MODE_WIDTH 1 /* (0x0E52) EQ4_1 */ #define MADERA_EQ4_B1_GAIN_MASK 0xF800 #define MADERA_EQ4_B1_GAIN_SHIFT 11 -#define MADERA_EQ4_B1_GAIN_WIDTH 5 #define MADERA_EQ4_B2_GAIN_MASK 0x07C0 #define MADERA_EQ4_B2_GAIN_SHIFT 6 -#define MADERA_EQ4_B2_GAIN_WIDTH 5 #define MADERA_EQ4_B3_GAIN_MASK 0x003E #define MADERA_EQ4_B3_GAIN_SHIFT 1 -#define MADERA_EQ4_B3_GAIN_WIDTH 5 #define MADERA_EQ4_ENA 0x0001 #define MADERA_EQ4_ENA_MASK 0x0001 #define MADERA_EQ4_ENA_SHIFT 0 -#define MADERA_EQ4_ENA_WIDTH 1 /* (0x0E53) EQ4_2 */ #define MADERA_EQ4_B4_GAIN_MASK 0xF800 #define MADERA_EQ4_B4_GAIN_SHIFT 11 -#define MADERA_EQ4_B4_GAIN_WIDTH 5 #define MADERA_EQ4_B5_GAIN_MASK 0x07C0 #define MADERA_EQ4_B5_GAIN_SHIFT 6 -#define MADERA_EQ4_B5_GAIN_WIDTH 5 #define MADERA_EQ4_B1_MODE 0x0001 #define MADERA_EQ4_B1_MODE_MASK 0x0001 #define MADERA_EQ4_B1_MODE_SHIFT 0 -#define MADERA_EQ4_B1_MODE_WIDTH 1 /* (0x0E80) DRC1_ctrl1 */ #define MADERA_DRC1L_ENA 0x0002 #define MADERA_DRC1L_ENA_MASK 0x0002 #define MADERA_DRC1L_ENA_SHIFT 1 -#define MADERA_DRC1L_ENA_WIDTH 1 #define MADERA_DRC1R_ENA 0x0001 #define MADERA_DRC1R_ENA_MASK 0x0001 #define MADERA_DRC1R_ENA_SHIFT 0 -#define MADERA_DRC1R_ENA_WIDTH 1 /* (0x0E88) DRC2_ctrl1 */ #define MADERA_DRC2L_ENA 0x0002 #define MADERA_DRC2L_ENA_MASK 0x0002 #define MADERA_DRC2L_ENA_SHIFT 1 -#define MADERA_DRC2L_ENA_WIDTH 1 #define MADERA_DRC2R_ENA 0x0001 #define MADERA_DRC2R_ENA_MASK 0x0001 #define MADERA_DRC2R_ENA_SHIFT 0 -#define MADERA_DRC2R_ENA_WIDTH 1 /* (0x0EC0) HPLPF1_1 */ #define MADERA_LHPF1_MODE 0x0002 #define MADERA_LHPF1_MODE_MASK 0x0002 #define MADERA_LHPF1_MODE_SHIFT 1 -#define MADERA_LHPF1_MODE_WIDTH 1 #define MADERA_LHPF1_ENA 0x0001 #define MADERA_LHPF1_ENA_MASK 0x0001 #define MADERA_LHPF1_ENA_SHIFT 0 -#define MADERA_LHPF1_ENA_WIDTH 1 /* (0x0EC1) HPLPF1_2 */ #define MADERA_LHPF1_COEFF_MASK 0xFFFF #define MADERA_LHPF1_COEFF_SHIFT 0 -#define MADERA_LHPF1_COEFF_WIDTH 16 /* (0x0EC4) HPLPF2_1 */ #define MADERA_LHPF2_MODE 0x0002 #define MADERA_LHPF2_MODE_MASK 0x0002 #define MADERA_LHPF2_MODE_SHIFT 1 -#define MADERA_LHPF2_MODE_WIDTH 1 #define MADERA_LHPF2_ENA 0x0001 #define MADERA_LHPF2_ENA_MASK 0x0001 #define MADERA_LHPF2_ENA_SHIFT 0 -#define MADERA_LHPF2_ENA_WIDTH 1 /* (0x0EC5) HPLPF2_2 */ #define MADERA_LHPF2_COEFF_MASK 0xFFFF #define MADERA_LHPF2_COEFF_SHIFT 0 -#define MADERA_LHPF2_COEFF_WIDTH 16 /* (0x0EC8) HPLPF3_1 */ #define MADERA_LHPF3_MODE 0x0002 #define MADERA_LHPF3_MODE_MASK 0x0002 #define MADERA_LHPF3_MODE_SHIFT 1 -#define MADERA_LHPF3_MODE_WIDTH 1 #define MADERA_LHPF3_ENA 0x0001 #define MADERA_LHPF3_ENA_MASK 0x0001 #define MADERA_LHPF3_ENA_SHIFT 0 -#define MADERA_LHPF3_ENA_WIDTH 1 /* (0x0EC9) HPLPF3_2 */ #define MADERA_LHPF3_COEFF_MASK 0xFFFF #define MADERA_LHPF3_COEFF_SHIFT 0 -#define MADERA_LHPF3_COEFF_WIDTH 16 /* (0x0ECC) HPLPF4_1 */ #define MADERA_LHPF4_MODE 0x0002 #define MADERA_LHPF4_MODE_MASK 0x0002 #define MADERA_LHPF4_MODE_SHIFT 1 -#define MADERA_LHPF4_MODE_WIDTH 1 #define MADERA_LHPF4_ENA 0x0001 #define MADERA_LHPF4_ENA_MASK 0x0001 #define MADERA_LHPF4_ENA_SHIFT 0 -#define MADERA_LHPF4_ENA_WIDTH 1 /* (0x0ECD) HPLPF4_2 */ #define MADERA_LHPF4_COEFF_MASK 0xFFFF #define MADERA_LHPF4_COEFF_SHIFT 0 -#define MADERA_LHPF4_COEFF_WIDTH 16 /* (0x0ED0) ASRC2_ENABLE */ #define MADERA_ASRC2_IN2L_ENA 0x0008 #define MADERA_ASRC2_IN2L_ENA_MASK 0x0008 #define MADERA_ASRC2_IN2L_ENA_SHIFT 3 -#define MADERA_ASRC2_IN2L_ENA_WIDTH 1 #define MADERA_ASRC2_IN2R_ENA 0x0004 #define MADERA_ASRC2_IN2R_ENA_MASK 0x0004 #define MADERA_ASRC2_IN2R_ENA_SHIFT 2 -#define MADERA_ASRC2_IN2R_ENA_WIDTH 1 #define MADERA_ASRC2_IN1L_ENA 0x0002 #define MADERA_ASRC2_IN1L_ENA_MASK 0x0002 #define MADERA_ASRC2_IN1L_ENA_SHIFT 1 -#define MADERA_ASRC2_IN1L_ENA_WIDTH 1 #define MADERA_ASRC2_IN1R_ENA 0x0001 #define MADERA_ASRC2_IN1R_ENA_MASK 0x0001 #define MADERA_ASRC2_IN1R_ENA_SHIFT 0 -#define MADERA_ASRC2_IN1R_ENA_WIDTH 1 /* (0x0ED2) ASRC2_RATE1 */ #define MADERA_ASRC2_RATE1_MASK 0xF800 #define MADERA_ASRC2_RATE1_SHIFT 11 -#define MADERA_ASRC2_RATE1_WIDTH 5 /* (0x0ED3) ASRC2_RATE2 */ #define MADERA_ASRC2_RATE2_MASK 0xF800 #define MADERA_ASRC2_RATE2_SHIFT 11 -#define MADERA_ASRC2_RATE2_WIDTH 5 /* (0x0EE0) ASRC1_ENABLE */ #define MADERA_ASRC1_IN2L_ENA 0x0008 #define MADERA_ASRC1_IN2L_ENA_MASK 0x0008 #define MADERA_ASRC1_IN2L_ENA_SHIFT 3 -#define MADERA_ASRC1_IN2L_ENA_WIDTH 1 #define MADERA_ASRC1_IN2R_ENA 0x0004 #define MADERA_ASRC1_IN2R_ENA_MASK 0x0004 #define MADERA_ASRC1_IN2R_ENA_SHIFT 2 -#define MADERA_ASRC1_IN2R_ENA_WIDTH 1 #define MADERA_ASRC1_IN1L_ENA 0x0002 #define MADERA_ASRC1_IN1L_ENA_MASK 0x0002 #define MADERA_ASRC1_IN1L_ENA_SHIFT 1 -#define MADERA_ASRC1_IN1L_ENA_WIDTH 1 #define MADERA_ASRC1_IN1R_ENA 0x0001 #define MADERA_ASRC1_IN1R_ENA_MASK 0x0001 #define MADERA_ASRC1_IN1R_ENA_SHIFT 0 -#define MADERA_ASRC1_IN1R_ENA_WIDTH 1 /* (0x0EE2) ASRC1_RATE1 */ #define MADERA_ASRC1_RATE1_MASK 0xF800 #define MADERA_ASRC1_RATE1_SHIFT 11 -#define MADERA_ASRC1_RATE1_WIDTH 5 /* (0x0EE3) ASRC1_RATE2 */ #define MADERA_ASRC1_RATE2_MASK 0xF800 #define MADERA_ASRC1_RATE2_SHIFT 11 -#define MADERA_ASRC1_RATE2_WIDTH 5 /* (0x0EF0) - ISRC1 CTRL 1 */ #define MADERA_ISRC1_FSH_MASK 0xF800 #define MADERA_ISRC1_FSH_SHIFT 11 -#define MADERA_ISRC1_FSH_WIDTH 5 #define MADERA_ISRC1_CLK_SEL_MASK 0x0700 #define MADERA_ISRC1_CLK_SEL_SHIFT 8 -#define MADERA_ISRC1_CLK_SEL_WIDTH 3 /* (0x0EF1) ISRC1_CTRL_2 */ #define MADERA_ISRC1_FSL_MASK 0xF800 #define MADERA_ISRC1_FSL_SHIFT 11 -#define MADERA_ISRC1_FSL_WIDTH 5 /* (0x0EF2) ISRC1_CTRL_3 */ #define MADERA_ISRC1_INT1_ENA 0x8000 #define MADERA_ISRC1_INT1_ENA_MASK 0x8000 #define MADERA_ISRC1_INT1_ENA_SHIFT 15 -#define MADERA_ISRC1_INT1_ENA_WIDTH 1 #define MADERA_ISRC1_INT2_ENA 0x4000 #define MADERA_ISRC1_INT2_ENA_MASK 0x4000 #define MADERA_ISRC1_INT2_ENA_SHIFT 14 -#define MADERA_ISRC1_INT2_ENA_WIDTH 1 #define MADERA_ISRC1_INT3_ENA 0x2000 #define MADERA_ISRC1_INT3_ENA_MASK 0x2000 #define MADERA_ISRC1_INT3_ENA_SHIFT 13 -#define MADERA_ISRC1_INT3_ENA_WIDTH 1 #define MADERA_ISRC1_INT4_ENA 0x1000 #define MADERA_ISRC1_INT4_ENA_MASK 0x1000 #define MADERA_ISRC1_INT4_ENA_SHIFT 12 -#define MADERA_ISRC1_INT4_ENA_WIDTH 1 #define MADERA_ISRC1_DEC1_ENA 0x0200 #define MADERA_ISRC1_DEC1_ENA_MASK 0x0200 #define MADERA_ISRC1_DEC1_ENA_SHIFT 9 -#define MADERA_ISRC1_DEC1_ENA_WIDTH 1 #define MADERA_ISRC1_DEC2_ENA 0x0100 #define MADERA_ISRC1_DEC2_ENA_MASK 0x0100 #define MADERA_ISRC1_DEC2_ENA_SHIFT 8 -#define MADERA_ISRC1_DEC2_ENA_WIDTH 1 #define MADERA_ISRC1_DEC3_ENA 0x0080 #define MADERA_ISRC1_DEC3_ENA_MASK 0x0080 #define MADERA_ISRC1_DEC3_ENA_SHIFT 7 -#define MADERA_ISRC1_DEC3_ENA_WIDTH 1 #define MADERA_ISRC1_DEC4_ENA 0x0040 #define MADERA_ISRC1_DEC4_ENA_MASK 0x0040 #define MADERA_ISRC1_DEC4_ENA_SHIFT 6 -#define MADERA_ISRC1_DEC4_ENA_WIDTH 1 #define MADERA_ISRC1_NOTCH_ENA 0x0001 #define MADERA_ISRC1_NOTCH_ENA_MASK 0x0001 #define MADERA_ISRC1_NOTCH_ENA_SHIFT 0 -#define MADERA_ISRC1_NOTCH_ENA_WIDTH 1 /* (0x0EF3) ISRC2_CTRL_1 */ #define MADERA_ISRC2_FSH_MASK 0xF800 #define MADERA_ISRC2_FSH_SHIFT 11 -#define MADERA_ISRC2_FSH_WIDTH 5 #define MADERA_ISRC2_CLK_SEL_MASK 0x0700 #define MADERA_ISRC2_CLK_SEL_SHIFT 8 -#define MADERA_ISRC2_CLK_SEL_WIDTH 3 /* (0x0EF4) ISRC2_CTRL_2 */ #define MADERA_ISRC2_FSL_MASK 0xF800 #define MADERA_ISRC2_FSL_SHIFT 11 -#define MADERA_ISRC2_FSL_WIDTH 5 /* (0x0EF5) ISRC2_CTRL_3 */ #define MADERA_ISRC2_INT1_ENA 0x8000 #define MADERA_ISRC2_INT1_ENA_MASK 0x8000 #define MADERA_ISRC2_INT1_ENA_SHIFT 15 -#define MADERA_ISRC2_INT1_ENA_WIDTH 1 #define MADERA_ISRC2_INT2_ENA 0x4000 #define MADERA_ISRC2_INT2_ENA_MASK 0x4000 #define MADERA_ISRC2_INT2_ENA_SHIFT 14 -#define MADERA_ISRC2_INT2_ENA_WIDTH 1 #define MADERA_ISRC2_INT3_ENA 0x2000 #define MADERA_ISRC2_INT3_ENA_MASK 0x2000 #define MADERA_ISRC2_INT3_ENA_SHIFT 13 -#define MADERA_ISRC2_INT3_ENA_WIDTH 1 #define MADERA_ISRC2_INT4_ENA 0x1000 #define MADERA_ISRC2_INT4_ENA_MASK 0x1000 #define MADERA_ISRC2_INT4_ENA_SHIFT 12 -#define MADERA_ISRC2_INT4_ENA_WIDTH 1 #define MADERA_ISRC2_DEC1_ENA 0x0200 #define MADERA_ISRC2_DEC1_ENA_MASK 0x0200 #define MADERA_ISRC2_DEC1_ENA_SHIFT 9 -#define MADERA_ISRC2_DEC1_ENA_WIDTH 1 #define MADERA_ISRC2_DEC2_ENA 0x0100 #define MADERA_ISRC2_DEC2_ENA_MASK 0x0100 #define MADERA_ISRC2_DEC2_ENA_SHIFT 8 -#define MADERA_ISRC2_DEC2_ENA_WIDTH 1 #define MADERA_ISRC2_DEC3_ENA 0x0080 #define MADERA_ISRC2_DEC3_ENA_MASK 0x0080 #define MADERA_ISRC2_DEC3_ENA_SHIFT 7 -#define MADERA_ISRC2_DEC3_ENA_WIDTH 1 #define MADERA_ISRC2_DEC4_ENA 0x0040 #define MADERA_ISRC2_DEC4_ENA_MASK 0x0040 #define MADERA_ISRC2_DEC4_ENA_SHIFT 6 -#define MADERA_ISRC2_DEC4_ENA_WIDTH 1 #define MADERA_ISRC2_NOTCH_ENA 0x0001 #define MADERA_ISRC2_NOTCH_ENA_MASK 0x0001 #define MADERA_ISRC2_NOTCH_ENA_SHIFT 0 -#define MADERA_ISRC2_NOTCH_ENA_WIDTH 1 /* (0x0EF6) ISRC3_CTRL_1 */ #define MADERA_ISRC3_FSH_MASK 0xF800 #define MADERA_ISRC3_FSH_SHIFT 11 -#define MADERA_ISRC3_FSH_WIDTH 5 #define MADERA_ISRC3_CLK_SEL_MASK 0x0700 #define MADERA_ISRC3_CLK_SEL_SHIFT 8 -#define MADERA_ISRC3_CLK_SEL_WIDTH 3 /* (0x0EF7) ISRC3_CTRL_2 */ #define MADERA_ISRC3_FSL_MASK 0xF800 #define MADERA_ISRC3_FSL_SHIFT 11 -#define MADERA_ISRC3_FSL_WIDTH 5 /* (0x0EF8) ISRC3_CTRL_3 */ #define MADERA_ISRC3_INT1_ENA 0x8000 #define MADERA_ISRC3_INT1_ENA_MASK 0x8000 #define MADERA_ISRC3_INT1_ENA_SHIFT 15 -#define MADERA_ISRC3_INT1_ENA_WIDTH 1 #define MADERA_ISRC3_INT2_ENA 0x4000 #define MADERA_ISRC3_INT2_ENA_MASK 0x4000 #define MADERA_ISRC3_INT2_ENA_SHIFT 14 -#define MADERA_ISRC3_INT2_ENA_WIDTH 1 #define MADERA_ISRC3_INT3_ENA 0x2000 #define MADERA_ISRC3_INT3_ENA_MASK 0x2000 #define MADERA_ISRC3_INT3_ENA_SHIFT 13 -#define MADERA_ISRC3_INT3_ENA_WIDTH 1 #define MADERA_ISRC3_INT4_ENA 0x1000 #define MADERA_ISRC3_INT4_ENA_MASK 0x1000 #define MADERA_ISRC3_INT4_ENA_SHIFT 12 -#define MADERA_ISRC3_INT4_ENA_WIDTH 1 #define MADERA_ISRC3_DEC1_ENA 0x0200 #define MADERA_ISRC3_DEC1_ENA_MASK 0x0200 #define MADERA_ISRC3_DEC1_ENA_SHIFT 9 -#define MADERA_ISRC3_DEC1_ENA_WIDTH 1 #define MADERA_ISRC3_DEC2_ENA 0x0100 #define MADERA_ISRC3_DEC2_ENA_MASK 0x0100 #define MADERA_ISRC3_DEC2_ENA_SHIFT 8 -#define MADERA_ISRC3_DEC2_ENA_WIDTH 1 #define MADERA_ISRC3_DEC3_ENA 0x0080 #define MADERA_ISRC3_DEC3_ENA_MASK 0x0080 #define MADERA_ISRC3_DEC3_ENA_SHIFT 7 -#define MADERA_ISRC3_DEC3_ENA_WIDTH 1 #define MADERA_ISRC3_DEC4_ENA 0x0040 #define MADERA_ISRC3_DEC4_ENA_MASK 0x0040 #define MADERA_ISRC3_DEC4_ENA_SHIFT 6 -#define MADERA_ISRC3_DEC4_ENA_WIDTH 1 #define MADERA_ISRC3_NOTCH_ENA 0x0001 #define MADERA_ISRC3_NOTCH_ENA_MASK 0x0001 #define MADERA_ISRC3_NOTCH_ENA_SHIFT 0 -#define MADERA_ISRC3_NOTCH_ENA_WIDTH 1 /* (0x0EF9) ISRC4_CTRL_1 */ #define MADERA_ISRC4_FSH_MASK 0xF800 #define MADERA_ISRC4_FSH_SHIFT 11 -#define MADERA_ISRC4_FSH_WIDTH 5 #define MADERA_ISRC4_CLK_SEL_MASK 0x0700 #define MADERA_ISRC4_CLK_SEL_SHIFT 8 -#define MADERA_ISRC4_CLK_SEL_WIDTH 3 /* (0x0EFA) ISRC4_CTRL_2 */ #define MADERA_ISRC4_FSL_MASK 0xF800 #define MADERA_ISRC4_FSL_SHIFT 11 -#define MADERA_ISRC4_FSL_WIDTH 5 /* (0x0EFB) ISRC4_CTRL_3 */ #define MADERA_ISRC4_INT1_ENA 0x8000 #define MADERA_ISRC4_INT1_ENA_MASK 0x8000 #define MADERA_ISRC4_INT1_ENA_SHIFT 15 -#define MADERA_ISRC4_INT1_ENA_WIDTH 1 #define MADERA_ISRC4_INT2_ENA 0x4000 #define MADERA_ISRC4_INT2_ENA_MASK 0x4000 #define MADERA_ISRC4_INT2_ENA_SHIFT 14 -#define MADERA_ISRC4_INT2_ENA_WIDTH 1 #define MADERA_ISRC4_INT3_ENA 0x2000 #define MADERA_ISRC4_INT3_ENA_MASK 0x2000 #define MADERA_ISRC4_INT3_ENA_SHIFT 13 -#define MADERA_ISRC4_INT3_ENA_WIDTH 1 #define MADERA_ISRC4_INT4_ENA 0x1000 #define MADERA_ISRC4_INT4_ENA_MASK 0x1000 #define MADERA_ISRC4_INT4_ENA_SHIFT 12 -#define MADERA_ISRC4_INT4_ENA_WIDTH 1 #define MADERA_ISRC4_DEC1_ENA 0x0200 #define MADERA_ISRC4_DEC1_ENA_MASK 0x0200 #define MADERA_ISRC4_DEC1_ENA_SHIFT 9 -#define MADERA_ISRC4_DEC1_ENA_WIDTH 1 #define MADERA_ISRC4_DEC2_ENA 0x0100 #define MADERA_ISRC4_DEC2_ENA_MASK 0x0100 #define MADERA_ISRC4_DEC2_ENA_SHIFT 8 -#define MADERA_ISRC4_DEC2_ENA_WIDTH 1 #define MADERA_ISRC4_DEC3_ENA 0x0080 #define MADERA_ISRC4_DEC3_ENA_MASK 0x0080 #define MADERA_ISRC4_DEC3_ENA_SHIFT 7 -#define MADERA_ISRC4_DEC3_ENA_WIDTH 1 #define MADERA_ISRC4_DEC4_ENA 0x0040 #define MADERA_ISRC4_DEC4_ENA_MASK 0x0040 #define MADERA_ISRC4_DEC4_ENA_SHIFT 6 -#define MADERA_ISRC4_DEC4_ENA_WIDTH 1 #define MADERA_ISRC4_NOTCH_ENA 0x0001 #define MADERA_ISRC4_NOTCH_ENA_MASK 0x0001 #define MADERA_ISRC4_NOTCH_ENA_SHIFT 0 -#define MADERA_ISRC4_NOTCH_ENA_WIDTH 1 /* (0x0F00) Clock_Control */ #define MADERA_EXT_NG_SEL_CLR 0x0080 #define MADERA_EXT_NG_SEL_CLR_MASK 0x0080 #define MADERA_EXT_NG_SEL_CLR_SHIFT 7 -#define MADERA_EXT_NG_SEL_CLR_WIDTH 1 #define MADERA_EXT_NG_SEL_SET 0x0040 #define MADERA_EXT_NG_SEL_SET_MASK 0x0040 #define MADERA_EXT_NG_SEL_SET_SHIFT 6 -#define MADERA_EXT_NG_SEL_SET_WIDTH 1 #define MADERA_CLK_R_ENA_CLR 0x0020 #define MADERA_CLK_R_ENA_CLR_MASK 0x0020 #define MADERA_CLK_R_ENA_CLR_SHIFT 5 -#define MADERA_CLK_R_ENA_CLR_WIDTH 1 #define MADERA_CLK_R_ENA_SET 0x0010 #define MADERA_CLK_R_ENA_SET_MASK 0x0010 #define MADERA_CLK_R_ENA_SET_SHIFT 4 -#define MADERA_CLK_R_ENA_SET_WIDTH 1 #define MADERA_CLK_NG_ENA_CLR 0x0008 #define MADERA_CLK_NG_ENA_CLR_MASK 0x0008 #define MADERA_CLK_NG_ENA_CLR_SHIFT 3 -#define MADERA_CLK_NG_ENA_CLR_WIDTH 1 #define MADERA_CLK_NG_ENA_SET 0x0004 #define MADERA_CLK_NG_ENA_SET_MASK 0x0004 #define MADERA_CLK_NG_ENA_SET_SHIFT 2 -#define MADERA_CLK_NG_ENA_SET_WIDTH 1 #define MADERA_CLK_L_ENA_CLR 0x0002 #define MADERA_CLK_L_ENA_CLR_MASK 0x0002 #define MADERA_CLK_L_ENA_CLR_SHIFT 1 -#define MADERA_CLK_L_ENA_CLR_WIDTH 1 #define MADERA_CLK_L_ENA_SET 0x0001 #define MADERA_CLK_L_ENA_SET_MASK 0x0001 #define MADERA_CLK_L_ENA_SET_SHIFT 0 -#define MADERA_CLK_L_ENA_SET_WIDTH 1 /* (0x0F01) ANC_SRC */ #define MADERA_IN_RXANCR_SEL_MASK 0x0070 #define MADERA_IN_RXANCR_SEL_SHIFT 4 -#define MADERA_IN_RXANCR_SEL_WIDTH 3 #define MADERA_IN_RXANCL_SEL_MASK 0x0007 #define MADERA_IN_RXANCL_SEL_SHIFT 0 -#define MADERA_IN_RXANCL_SEL_WIDTH 3 /* (0x0F17) FCL_ADC_reformatter_control */ #define MADERA_FCL_MIC_MODE_SEL 0x000C #define MADERA_FCL_MIC_MODE_SEL_SHIFT 2 -#define MADERA_FCL_MIC_MODE_SEL_WIDTH 2 /* (0x0F73) FCR_ADC_reformatter_control */ #define MADERA_FCR_MIC_MODE_SEL 0x000C #define MADERA_FCR_MIC_MODE_SEL_SHIFT 2 -#define MADERA_FCR_MIC_MODE_SEL_WIDTH 2 /* (0x10C0) AUXPDM1_CTRL_0 */ #define MADERA_AUXPDM1_SRC_MASK 0x0F00 #define MADERA_AUXPDM1_SRC_SHIFT 8 -#define MADERA_AUXPDM1_SRC_WIDTH 4 #define MADERA_AUXPDM1_TXEDGE_MASK 0x0010 #define MADERA_AUXPDM1_TXEDGE_SHIFT 4 -#define MADERA_AUXPDM1_TXEDGE_WIDTH 1 #define MADERA_AUXPDM1_MSTR_MASK 0x0008 #define MADERA_AUXPDM1_MSTR_SHIFT 3 -#define MADERA_AUXPDM1_MSTR_WIDTH 1 #define MADERA_AUXPDM1_ENABLE_MASK 0x0001 #define MADERA_AUXPDM1_ENABLE_SHIFT 0 -#define MADERA_AUXPDM1_ENABLE_WIDTH 1 /* (0x10C1) AUXPDM1_CTRL_1 */ #define MADERA_AUXPDM1_CLK_FREQ_MASK 0xC000 #define MADERA_AUXPDM1_CLK_FREQ_SHIFT 14 -#define MADERA_AUXPDM1_CLK_FREQ_WIDTH 2 /* (0x1480) DFC1_CTRL_W0 */ #define MADERA_DFC1_RATE_MASK 0x007C #define MADERA_DFC1_RATE_SHIFT 2 -#define MADERA_DFC1_RATE_WIDTH 5 #define MADERA_DFC1_DITH_ENA 0x0002 #define MADERA_DFC1_DITH_ENA_MASK 0x0002 #define MADERA_DFC1_DITH_ENA_SHIFT 1 -#define MADERA_DFC1_DITH_ENA_WIDTH 1 #define MADERA_DFC1_ENA 0x0001 #define MADERA_DFC1_ENA_MASK 0x0001 #define MADERA_DFC1_ENA_SHIFT 0 -#define MADERA_DFC1_ENA_WIDTH 1 /* (0x1482) DFC1_RX_W0 */ #define MADERA_DFC1_RX_DATA_WIDTH_MASK 0x1F00 #define MADERA_DFC1_RX_DATA_WIDTH_SHIFT 8 -#define MADERA_DFC1_RX_DATA_WIDTH_WIDTH 5 #define MADERA_DFC1_RX_DATA_TYPE_MASK 0x0007 #define MADERA_DFC1_RX_DATA_TYPE_SHIFT 0 -#define MADERA_DFC1_RX_DATA_TYPE_WIDTH 3 /* (0x1484) DFC1_TX_W0 */ #define MADERA_DFC1_TX_DATA_WIDTH_MASK 0x1F00 #define MADERA_DFC1_TX_DATA_WIDTH_SHIFT 8 -#define MADERA_DFC1_TX_DATA_WIDTH_WIDTH 5 #define MADERA_DFC1_TX_DATA_TYPE_MASK 0x0007 #define MADERA_DFC1_TX_DATA_TYPE_SHIFT 0 -#define MADERA_DFC1_TX_DATA_TYPE_WIDTH 3 /* (0x1600) ADSP2_IRQ0 */ #define MADERA_DSP_IRQ2 0x0002 @@ -3636,449 +3103,347 @@ #define MADERA_GP1_LVL 0x8000 #define MADERA_GP1_LVL_MASK 0x8000 #define MADERA_GP1_LVL_SHIFT 15 -#define MADERA_GP1_LVL_WIDTH 1 #define MADERA_GP1_OP_CFG 0x4000 #define MADERA_GP1_OP_CFG_MASK 0x4000 #define MADERA_GP1_OP_CFG_SHIFT 14 -#define MADERA_GP1_OP_CFG_WIDTH 1 #define MADERA_GP1_DB 0x2000 #define MADERA_GP1_DB_MASK 0x2000 #define MADERA_GP1_DB_SHIFT 13 -#define MADERA_GP1_DB_WIDTH 1 #define MADERA_GP1_POL 0x1000 #define MADERA_GP1_POL_MASK 0x1000 #define MADERA_GP1_POL_SHIFT 12 -#define MADERA_GP1_POL_WIDTH 1 #define MADERA_GP1_IP_CFG 0x0800 #define MADERA_GP1_IP_CFG_MASK 0x0800 #define MADERA_GP1_IP_CFG_SHIFT 11 -#define MADERA_GP1_IP_CFG_WIDTH 1 #define MADERA_GP1_FN_MASK 0x03FF #define MADERA_GP1_FN_SHIFT 0 -#define MADERA_GP1_FN_WIDTH 10 /* (0x1701) GPIO1_CTRL_2 */ #define MADERA_GP1_DIR 0x8000 #define MADERA_GP1_DIR_MASK 0x8000 #define MADERA_GP1_DIR_SHIFT 15 -#define MADERA_GP1_DIR_WIDTH 1 #define MADERA_GP1_PU 0x4000 #define MADERA_GP1_PU_MASK 0x4000 #define MADERA_GP1_PU_SHIFT 14 -#define MADERA_GP1_PU_WIDTH 1 #define MADERA_GP1_PD 0x2000 #define MADERA_GP1_PD_MASK 0x2000 #define MADERA_GP1_PD_SHIFT 13 -#define MADERA_GP1_PD_WIDTH 1 #define MADERA_GP1_DRV_STR_MASK 0x1800 #define MADERA_GP1_DRV_STR_SHIFT 11 -#define MADERA_GP1_DRV_STR_WIDTH 2 /* (0x1800) IRQ1_Status_1 */ #define MADERA_CTRLIF_ERR_EINT1 0x1000 #define MADERA_CTRLIF_ERR_EINT1_MASK 0x1000 #define MADERA_CTRLIF_ERR_EINT1_SHIFT 12 -#define MADERA_CTRLIF_ERR_EINT1_WIDTH 1 #define MADERA_SYSCLK_FAIL_EINT1 0x0200 #define MADERA_SYSCLK_FAIL_EINT1_MASK 0x0200 #define MADERA_SYSCLK_FAIL_EINT1_SHIFT 9 -#define MADERA_SYSCLK_FAIL_EINT1_WIDTH 1 #define MADERA_CLOCK_DETECT_EINT1 0x0100 #define MADERA_CLOCK_DETECT_EINT1_MASK 0x0100 #define MADERA_CLOCK_DETECT_EINT1_SHIFT 8 -#define MADERA_CLOCK_DETECT_EINT1_WIDTH 1 #define MADERA_BOOT_DONE_EINT1 0x0080 #define MADERA_BOOT_DONE_EINT1_MASK 0x0080 #define MADERA_BOOT_DONE_EINT1_SHIFT 7 -#define MADERA_BOOT_DONE_EINT1_WIDTH 1 /* (0x1801) IRQ1_Status_2 */ #define MADERA_FLLAO_LOCK_EINT1 0x0800 #define MADERA_FLLAO_LOCK_EINT1_MASK 0x0800 #define MADERA_FLLAO_LOCK_EINT1_SHIFT 11 -#define MADERA_FLLAO_LOCK_EINT1_WIDTH 1 #define MADERA_FLL3_LOCK_EINT1 0x0400 #define MADERA_FLL3_LOCK_EINT1_MASK 0x0400 #define MADERA_FLL3_LOCK_EINT1_SHIFT 10 -#define MADERA_FLL3_LOCK_EINT1_WIDTH 1 #define MADERA_FLL2_LOCK_EINT1 0x0200 #define MADERA_FLL2_LOCK_EINT1_MASK 0x0200 #define MADERA_FLL2_LOCK_EINT1_SHIFT 9 -#define MADERA_FLL2_LOCK_EINT1_WIDTH 1 #define MADERA_FLL1_LOCK_EINT1 0x0100 #define MADERA_FLL1_LOCK_EINT1_MASK 0x0100 #define MADERA_FLL1_LOCK_EINT1_SHIFT 8 -#define MADERA_FLL1_LOCK_EINT1_WIDTH 1 /* (0x1805) IRQ1_Status_6 */ #define MADERA_MICDET2_EINT1 0x0200 #define MADERA_MICDET2_EINT1_MASK 0x0200 #define MADERA_MICDET2_EINT1_SHIFT 9 -#define MADERA_MICDET2_EINT1_WIDTH 1 #define MADERA_MICDET1_EINT1 0x0100 #define MADERA_MICDET1_EINT1_MASK 0x0100 #define MADERA_MICDET1_EINT1_SHIFT 8 -#define MADERA_MICDET1_EINT1_WIDTH 1 #define MADERA_HPDET_EINT1 0x0001 #define MADERA_HPDET_EINT1_MASK 0x0001 #define MADERA_HPDET_EINT1_SHIFT 0 -#define MADERA_HPDET_EINT1_WIDTH 1 /* (0x1806) IRQ1_Status_7 */ #define MADERA_MICD_CLAMP_FALL_EINT1 0x0020 #define MADERA_MICD_CLAMP_FALL_EINT1_MASK 0x0020 #define MADERA_MICD_CLAMP_FALL_EINT1_SHIFT 5 -#define MADERA_MICD_CLAMP_FALL_EINT1_WIDTH 1 #define MADERA_MICD_CLAMP_RISE_EINT1 0x0010 #define MADERA_MICD_CLAMP_RISE_EINT1_MASK 0x0010 #define MADERA_MICD_CLAMP_RISE_EINT1_SHIFT 4 -#define MADERA_MICD_CLAMP_RISE_EINT1_WIDTH 1 #define MADERA_JD2_FALL_EINT1 0x0008 #define MADERA_JD2_FALL_EINT1_MASK 0x0008 #define MADERA_JD2_FALL_EINT1_SHIFT 3 -#define MADERA_JD2_FALL_EINT1_WIDTH 1 #define MADERA_JD2_RISE_EINT1 0x0004 #define MADERA_JD2_RISE_EINT1_MASK 0x0004 #define MADERA_JD2_RISE_EINT1_SHIFT 2 -#define MADERA_JD2_RISE_EINT1_WIDTH 1 #define MADERA_JD1_FALL_EINT1 0x0002 #define MADERA_JD1_FALL_EINT1_MASK 0x0002 #define MADERA_JD1_FALL_EINT1_SHIFT 1 -#define MADERA_JD1_FALL_EINT1_WIDTH 1 #define MADERA_JD1_RISE_EINT1 0x0001 #define MADERA_JD1_RISE_EINT1_MASK 0x0001 #define MADERA_JD1_RISE_EINT1_SHIFT 0 -#define MADERA_JD1_RISE_EINT1_WIDTH 1 /* (0x1808) IRQ1_Status_9 */ #define MADERA_ASRC2_IN2_LOCK_EINT1 0x0800 #define MADERA_ASRC2_IN2_LOCK_EINT1_MASK 0x0800 #define MADERA_ASRC2_IN2_LOCK_EINT1_SHIFT 11 -#define MADERA_ASRC2_IN2_LOCK_EINT1_WIDTH 1 #define MADERA_ASRC2_IN1_LOCK_EINT1 0x0400 #define MADERA_ASRC2_IN1_LOCK_EINT1_MASK 0x0400 #define MADERA_ASRC2_IN1_LOCK_EINT1_SHIFT 10 -#define MADERA_ASRC2_IN1_LOCK_EINT1_WIDTH 1 #define MADERA_ASRC1_IN2_LOCK_EINT1 0x0200 #define MADERA_ASRC1_IN2_LOCK_EINT1_MASK 0x0200 #define MADERA_ASRC1_IN2_LOCK_EINT1_SHIFT 9 -#define MADERA_ASRC1_IN2_LOCK_EINT1_WIDTH 1 #define MADERA_ASRC1_IN1_LOCK_EINT1 0x0100 #define MADERA_ASRC1_IN1_LOCK_EINT1_MASK 0x0100 #define MADERA_ASRC1_IN1_LOCK_EINT1_SHIFT 8 -#define MADERA_ASRC1_IN1_LOCK_EINT1_WIDTH 1 #define MADERA_DRC2_SIG_DET_EINT1 0x0002 #define MADERA_DRC2_SIG_DET_EINT1_MASK 0x0002 #define MADERA_DRC2_SIG_DET_EINT1_SHIFT 1 -#define MADERA_DRC2_SIG_DET_EINT1_WIDTH 1 #define MADERA_DRC1_SIG_DET_EINT1 0x0001 #define MADERA_DRC1_SIG_DET_EINT1_MASK 0x0001 #define MADERA_DRC1_SIG_DET_EINT1_SHIFT 0 -#define MADERA_DRC1_SIG_DET_EINT1_WIDTH 1 /* (0x180A) IRQ1_Status_11 */ #define MADERA_DSP_IRQ16_EINT1 0x8000 #define MADERA_DSP_IRQ16_EINT1_MASK 0x8000 #define MADERA_DSP_IRQ16_EINT1_SHIFT 15 -#define MADERA_DSP_IRQ16_EINT1_WIDTH 1 #define MADERA_DSP_IRQ15_EINT1 0x4000 #define MADERA_DSP_IRQ15_EINT1_MASK 0x4000 #define MADERA_DSP_IRQ15_EINT1_SHIFT 14 -#define MADERA_DSP_IRQ15_EINT1_WIDTH 1 #define MADERA_DSP_IRQ14_EINT1 0x2000 #define MADERA_DSP_IRQ14_EINT1_MASK 0x2000 #define MADERA_DSP_IRQ14_EINT1_SHIFT 13 -#define MADERA_DSP_IRQ14_EINT1_WIDTH 1 #define MADERA_DSP_IRQ13_EINT1 0x1000 #define MADERA_DSP_IRQ13_EINT1_MASK 0x1000 #define MADERA_DSP_IRQ13_EINT1_SHIFT 12 -#define MADERA_DSP_IRQ13_EINT1_WIDTH 1 #define MADERA_DSP_IRQ12_EINT1 0x0800 #define MADERA_DSP_IRQ12_EINT1_MASK 0x0800 #define MADERA_DSP_IRQ12_EINT1_SHIFT 11 -#define MADERA_DSP_IRQ12_EINT1_WIDTH 1 #define MADERA_DSP_IRQ11_EINT1 0x0400 #define MADERA_DSP_IRQ11_EINT1_MASK 0x0400 #define MADERA_DSP_IRQ11_EINT1_SHIFT 10 -#define MADERA_DSP_IRQ11_EINT1_WIDTH 1 #define MADERA_DSP_IRQ10_EINT1 0x0200 #define MADERA_DSP_IRQ10_EINT1_MASK 0x0200 #define MADERA_DSP_IRQ10_EINT1_SHIFT 9 -#define MADERA_DSP_IRQ10_EINT1_WIDTH 1 #define MADERA_DSP_IRQ9_EINT1 0x0100 #define MADERA_DSP_IRQ9_EINT1_MASK 0x0100 #define MADERA_DSP_IRQ9_EINT1_SHIFT 8 -#define MADERA_DSP_IRQ9_EINT1_WIDTH 1 #define MADERA_DSP_IRQ8_EINT1 0x0080 #define MADERA_DSP_IRQ8_EINT1_MASK 0x0080 #define MADERA_DSP_IRQ8_EINT1_SHIFT 7 -#define MADERA_DSP_IRQ8_EINT1_WIDTH 1 #define MADERA_DSP_IRQ7_EINT1 0x0040 #define MADERA_DSP_IRQ7_EINT1_MASK 0x0040 #define MADERA_DSP_IRQ7_EINT1_SHIFT 6 -#define MADERA_DSP_IRQ7_EINT1_WIDTH 1 #define MADERA_DSP_IRQ6_EINT1 0x0020 #define MADERA_DSP_IRQ6_EINT1_MASK 0x0020 #define MADERA_DSP_IRQ6_EINT1_SHIFT 5 -#define MADERA_DSP_IRQ6_EINT1_WIDTH 1 #define MADERA_DSP_IRQ5_EINT1 0x0010 #define MADERA_DSP_IRQ5_EINT1_MASK 0x0010 #define MADERA_DSP_IRQ5_EINT1_SHIFT 4 -#define MADERA_DSP_IRQ5_EINT1_WIDTH 1 #define MADERA_DSP_IRQ4_EINT1 0x0008 #define MADERA_DSP_IRQ4_EINT1_MASK 0x0008 #define MADERA_DSP_IRQ4_EINT1_SHIFT 3 -#define MADERA_DSP_IRQ4_EINT1_WIDTH 1 #define MADERA_DSP_IRQ3_EINT1 0x0004 #define MADERA_DSP_IRQ3_EINT1_MASK 0x0004 #define MADERA_DSP_IRQ3_EINT1_SHIFT 2 -#define MADERA_DSP_IRQ3_EINT1_WIDTH 1 #define MADERA_DSP_IRQ2_EINT1 0x0002 #define MADERA_DSP_IRQ2_EINT1_MASK 0x0002 #define MADERA_DSP_IRQ2_EINT1_SHIFT 1 -#define MADERA_DSP_IRQ2_EINT1_WIDTH 1 #define MADERA_DSP_IRQ1_EINT1 0x0001 #define MADERA_DSP_IRQ1_EINT1_MASK 0x0001 #define MADERA_DSP_IRQ1_EINT1_SHIFT 0 -#define MADERA_DSP_IRQ1_EINT1_WIDTH 1 /* (0x180B) IRQ1_Status_12 */ #define MADERA_SPKOUTR_SC_EINT1 0x0080 #define MADERA_SPKOUTR_SC_EINT1_MASK 0x0080 #define MADERA_SPKOUTR_SC_EINT1_SHIFT 7 -#define MADERA_SPKOUTR_SC_EINT1_WIDTH 1 #define MADERA_SPKOUTL_SC_EINT1 0x0040 #define MADERA_SPKOUTL_SC_EINT1_MASK 0x0040 #define MADERA_SPKOUTL_SC_EINT1_SHIFT 6 -#define MADERA_SPKOUTL_SC_EINT1_WIDTH 1 #define MADERA_HP3R_SC_EINT1 0x0020 #define MADERA_HP3R_SC_EINT1_MASK 0x0020 #define MADERA_HP3R_SC_EINT1_SHIFT 5 -#define MADERA_HP3R_SC_EINT1_WIDTH 1 #define MADERA_HP3L_SC_EINT1 0x0010 #define MADERA_HP3L_SC_EINT1_MASK 0x0010 #define MADERA_HP3L_SC_EINT1_SHIFT 4 -#define MADERA_HP3L_SC_EINT1_WIDTH 1 #define MADERA_HP2R_SC_EINT1 0x0008 #define MADERA_HP2R_SC_EINT1_MASK 0x0008 #define MADERA_HP2R_SC_EINT1_SHIFT 3 -#define MADERA_HP2R_SC_EINT1_WIDTH 1 #define MADERA_HP2L_SC_EINT1 0x0004 #define MADERA_HP2L_SC_EINT1_MASK 0x0004 #define MADERA_HP2L_SC_EINT1_SHIFT 2 -#define MADERA_HP2L_SC_EINT1_WIDTH 1 #define MADERA_HP1R_SC_EINT1 0x0002 #define MADERA_HP1R_SC_EINT1_MASK 0x0002 #define MADERA_HP1R_SC_EINT1_SHIFT 1 -#define MADERA_HP1R_SC_EINT1_WIDTH 1 #define MADERA_HP1L_SC_EINT1 0x0001 #define MADERA_HP1L_SC_EINT1_MASK 0x0001 #define MADERA_HP1L_SC_EINT1_SHIFT 0 -#define MADERA_HP1L_SC_EINT1_WIDTH 1 /* (0x180E) IRQ1_Status_15 */ #define MADERA_SPK_OVERHEAT_WARN_EINT1 0x0004 #define MADERA_SPK_OVERHEAT_WARN_EINT1_MASK 0x0004 #define MADERA_SPK_OVERHEAT_WARN_EINT1_SHIFT 2 -#define MADERA_SPK_OVERHEAT_WARN_EINT1_WIDTH 1 #define MADERA_SPK_OVERHEAT_EINT1 0x0002 #define MADERA_SPK_OVERHEAT_EINT1_MASK 0x0002 #define MADERA_SPK_OVERHEAT_EINT1_SHIFT 1 -#define MADERA_SPK_OVERHEAT_EINT1_WIDTH 1 #define MADERA_SPK_SHUTDOWN_EINT1 0x0001 #define MADERA_SPK_SHUTDOWN_EINT1_MASK 0x0001 #define MADERA_SPK_SHUTDOWN_EINT1_SHIFT 0 -#define MADERA_SPK_SHUTDOWN_EINT1_WIDTH 1 /* (0x1820) - IRQ1 Status 33 */ #define MADERA_DSP7_BUS_ERR_EINT1 0x0040 #define MADERA_DSP7_BUS_ERR_EINT1_MASK 0x0040 #define MADERA_DSP7_BUS_ERR_EINT1_SHIFT 6 -#define MADERA_DSP7_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP6_BUS_ERR_EINT1 0x0020 #define MADERA_DSP6_BUS_ERR_EINT1_MASK 0x0020 #define MADERA_DSP6_BUS_ERR_EINT1_SHIFT 5 -#define MADERA_DSP6_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP5_BUS_ERR_EINT1 0x0010 #define MADERA_DSP5_BUS_ERR_EINT1_MASK 0x0010 #define MADERA_DSP5_BUS_ERR_EINT1_SHIFT 4 -#define MADERA_DSP5_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP4_BUS_ERR_EINT1 0x0008 #define MADERA_DSP4_BUS_ERR_EINT1_MASK 0x0008 #define MADERA_DSP4_BUS_ERR_EINT1_SHIFT 3 -#define MADERA_DSP4_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP3_BUS_ERR_EINT1 0x0004 #define MADERA_DSP3_BUS_ERR_EINT1_MASK 0x0004 #define MADERA_DSP3_BUS_ERR_EINT1_SHIFT 2 -#define MADERA_DSP3_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP2_BUS_ERR_EINT1 0x0002 #define MADERA_DSP2_BUS_ERR_EINT1_MASK 0x0002 #define MADERA_DSP2_BUS_ERR_EINT1_SHIFT 1 -#define MADERA_DSP2_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP1_BUS_ERR_EINT1 0x0001 #define MADERA_DSP1_BUS_ERR_EINT1_MASK 0x0001 #define MADERA_DSP1_BUS_ERR_EINT1_SHIFT 0 -#define MADERA_DSP1_BUS_ERR_EINT1_WIDTH 1 /* (0x1845) IRQ1_Mask_6 */ #define MADERA_IM_MICDET2_EINT1 0x0200 #define MADERA_IM_MICDET2_EINT1_MASK 0x0200 #define MADERA_IM_MICDET2_EINT1_SHIFT 9 -#define MADERA_IM_MICDET2_EINT1_WIDTH 1 #define MADERA_IM_MICDET1_EINT1 0x0100 #define MADERA_IM_MICDET1_EINT1_MASK 0x0100 #define MADERA_IM_MICDET1_EINT1_SHIFT 8 -#define MADERA_IM_MICDET1_EINT1_WIDTH 1 #define MADERA_IM_HPDET_EINT1 0x0001 #define MADERA_IM_HPDET_EINT1_MASK 0x0001 #define MADERA_IM_HPDET_EINT1_SHIFT 0 -#define MADERA_IM_HPDET_EINT1_WIDTH 1 /* (0x184E) IRQ1_Mask_15 */ #define MADERA_IM_SPK_OVERHEAT_WARN_EINT1 0x0004 #define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_MASK 0x0004 #define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_SHIFT 2 -#define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_WIDTH 1 #define MADERA_IM_SPK_OVERHEAT_EINT1 0x0002 #define MADERA_IM_SPK_OVERHEAT_EINT1_MASK 0x0002 #define MADERA_IM_SPK_OVERHEAT_EINT1_SHIFT 1 -#define MADERA_IM_SPK_OVERHEAT_EINT1_WIDTH 1 #define MADERA_IM_SPK_SHUTDOWN_EINT1 0x0001 #define MADERA_IM_SPK_SHUTDOWN_EINT1_MASK 0x0001 #define MADERA_IM_SPK_SHUTDOWN_EINT1_SHIFT 0 -#define MADERA_IM_SPK_SHUTDOWN_EINT1_WIDTH 1 /* (0x1880) - IRQ1 Raw Status 1 */ #define MADERA_CTRLIF_ERR_STS1 0x1000 #define MADERA_CTRLIF_ERR_STS1_MASK 0x1000 #define MADERA_CTRLIF_ERR_STS1_SHIFT 12 -#define MADERA_CTRLIF_ERR_STS1_WIDTH 1 #define MADERA_SYSCLK_FAIL_STS1 0x0200 #define MADERA_SYSCLK_FAIL_STS1_MASK 0x0200 #define MADERA_SYSCLK_FAIL_STS1_SHIFT 9 -#define MADERA_SYSCLK_FAIL_STS1_WIDTH 1 #define MADERA_CLOCK_DETECT_STS1 0x0100 #define MADERA_CLOCK_DETECT_STS1_MASK 0x0100 #define MADERA_CLOCK_DETECT_STS1_SHIFT 8 -#define MADERA_CLOCK_DETECT_STS1_WIDTH 1 #define MADERA_BOOT_DONE_STS1 0x0080 #define MADERA_BOOT_DONE_STS1_MASK 0x0080 #define MADERA_BOOT_DONE_STS1_SHIFT 7 -#define MADERA_BOOT_DONE_STS1_WIDTH 1 /* (0x1881) - IRQ1 Raw Status 2 */ #define MADERA_FLL3_LOCK_STS1 0x0400 #define MADERA_FLL3_LOCK_STS1_MASK 0x0400 #define MADERA_FLL3_LOCK_STS1_SHIFT 10 -#define MADERA_FLL3_LOCK_STS1_WIDTH 1 #define MADERA_FLL2_LOCK_STS1 0x0200 #define MADERA_FLL2_LOCK_STS1_MASK 0x0200 #define MADERA_FLL2_LOCK_STS1_SHIFT 9 -#define MADERA_FLL2_LOCK_STS1_WIDTH 1 #define MADERA_FLL1_LOCK_STS1 0x0100 #define MADERA_FLL1_LOCK_STS1_MASK 0x0100 #define MADERA_FLL1_LOCK_STS1_SHIFT 8 -#define MADERA_FLL1_LOCK_STS1_WIDTH 1 /* (0x1886) - IRQ1 Raw Status 7 */ #define MADERA_MICD_CLAMP_FALL_STS1 0x0020 #define MADERA_MICD_CLAMP_FALL_STS1_MASK 0x0020 #define MADERA_MICD_CLAMP_FALL_STS1_SHIFT 5 -#define MADERA_MICD_CLAMP_FALL_STS1_WIDTH 1 #define MADERA_MICD_CLAMP_RISE_STS1 0x0010 #define MADERA_MICD_CLAMP_RISE_STS1_MASK 0x0010 #define MADERA_MICD_CLAMP_RISE_STS1_SHIFT 4 -#define MADERA_MICD_CLAMP_RISE_STS1_WIDTH 1 #define MADERA_JD2_FALL_STS1 0x0008 #define MADERA_JD2_FALL_STS1_MASK 0x0008 #define MADERA_JD2_FALL_STS1_SHIFT 3 -#define MADERA_JD2_FALL_STS1_WIDTH 1 #define MADERA_JD2_RISE_STS1 0x0004 #define MADERA_JD2_RISE_STS1_MASK 0x0004 #define MADERA_JD2_RISE_STS1_SHIFT 2 -#define MADERA_JD2_RISE_STS1_WIDTH 1 #define MADERA_JD1_FALL_STS1 0x0002 #define MADERA_JD1_FALL_STS1_MASK 0x0002 #define MADERA_JD1_FALL_STS1_SHIFT 1 -#define MADERA_JD1_FALL_STS1_WIDTH 1 #define MADERA_JD1_RISE_STS1 0x0001 #define MADERA_JD1_RISE_STS1_MASK 0x0001 #define MADERA_JD1_RISE_STS1_SHIFT 0 -#define MADERA_JD1_RISE_STS1_WIDTH 1 /* (0x188E) - IRQ1 Raw Status 15 */ #define MADERA_SPK_OVERHEAT_WARN_STS1 0x0004 #define MADERA_SPK_OVERHEAT_WARN_STS1_MASK 0x0004 #define MADERA_SPK_OVERHEAT_WARN_STS1_SHIFT 2 -#define MADERA_SPK_OVERHEAT_WARN_STS1_WIDTH 1 #define MADERA_SPK_OVERHEAT_STS1 0x0002 #define MADERA_SPK_OVERHEAT_STS1_MASK 0x0002 #define MADERA_SPK_OVERHEAT_STS1_SHIFT 1 -#define MADERA_SPK_OVERHEAT_STS1_WIDTH 1 #define MADERA_SPK_SHUTDOWN_STS1 0x0001 #define MADERA_SPK_SHUTDOWN_STS1_MASK 0x0001 #define MADERA_SPK_SHUTDOWN_STS1_SHIFT 0 -#define MADERA_SPK_SHUTDOWN_STS1_WIDTH 1 /* (0x1A06) Interrupt_Debounce_7 */ #define MADERA_MICD_CLAMP_DB 0x0010 #define MADERA_MICD_CLAMP_DB_MASK 0x0010 #define MADERA_MICD_CLAMP_DB_SHIFT 4 -#define MADERA_MICD_CLAMP_DB_WIDTH 1 #define MADERA_JD2_DB 0x0004 #define MADERA_JD2_DB_MASK 0x0004 #define MADERA_JD2_DB_SHIFT 2 -#define MADERA_JD2_DB_WIDTH 1 #define MADERA_JD1_DB 0x0001 #define MADERA_JD1_DB_MASK 0x0001 #define MADERA_JD1_DB_SHIFT 0 -#define MADERA_JD1_DB_WIDTH 1 /* (0x1A0E) Interrupt_Debounce_15 */ #define MADERA_SPK_OVERHEAT_WARN_DB 0x0004 #define MADERA_SPK_OVERHEAT_WARN_DB_MASK 0x0004 #define MADERA_SPK_OVERHEAT_WARN_DB_SHIFT 2 -#define MADERA_SPK_OVERHEAT_WARN_DB_WIDTH 1 #define MADERA_SPK_OVERHEAT_DB 0x0002 #define MADERA_SPK_OVERHEAT_DB_MASK 0x0002 #define MADERA_SPK_OVERHEAT_DB_SHIFT 1 -#define MADERA_SPK_OVERHEAT_DB_WIDTH 1 /* (0x1A80) IRQ1_CTRL */ #define MADERA_IM_IRQ1 0x0800 #define MADERA_IM_IRQ1_MASK 0x0800 #define MADERA_IM_IRQ1_SHIFT 11 -#define MADERA_IM_IRQ1_WIDTH 1 #define MADERA_IRQ_POL 0x0400 #define MADERA_IRQ_POL_MASK 0x0400 #define MADERA_IRQ_POL_SHIFT 10 -#define MADERA_IRQ_POL_WIDTH 1 /* (0x20004) OTP_HPDET_Cal_1 */ #define MADERA_OTP_HPDET_CALIB_OFFSET_11 0xFF000000 #define MADERA_OTP_HPDET_CALIB_OFFSET_11_MASK 0xFF000000 #define MADERA_OTP_HPDET_CALIB_OFFSET_11_SHIFT 24 -#define MADERA_OTP_HPDET_CALIB_OFFSET_11_WIDTH 8 #define MADERA_OTP_HPDET_CALIB_OFFSET_10 0x00FF0000 #define MADERA_OTP_HPDET_CALIB_OFFSET_10_MASK 0x00FF0000 #define MADERA_OTP_HPDET_CALIB_OFFSET_10_SHIFT 16 -#define MADERA_OTP_HPDET_CALIB_OFFSET_10_WIDTH 8 #define MADERA_OTP_HPDET_CALIB_OFFSET_01 0x0000FF00 #define MADERA_OTP_HPDET_CALIB_OFFSET_01_MASK 0x0000FF00 #define MADERA_OTP_HPDET_CALIB_OFFSET_01_SHIFT 8 -#define MADERA_OTP_HPDET_CALIB_OFFSET_01_WIDTH 8 #define MADERA_OTP_HPDET_CALIB_OFFSET_00 0x000000FF #define MADERA_OTP_HPDET_CALIB_OFFSET_00_MASK 0x000000FF #define MADERA_OTP_HPDET_CALIB_OFFSET_00_SHIFT 0 -#define MADERA_OTP_HPDET_CALIB_OFFSET_00_WIDTH 8 /* (0x20006) OTP_HPDET_Cal_2 */ #define MADERA_OTP_HPDET_GRADIENT_1X 0x0000FF00 #define MADERA_OTP_HPDET_GRADIENT_1X_MASK 0x0000FF00 #define MADERA_OTP_HPDET_GRADIENT_1X_SHIFT 8 -#define MADERA_OTP_HPDET_GRADIENT_1X_WIDTH 8 #define MADERA_OTP_HPDET_GRADIENT_0X 0x000000FF #define MADERA_OTP_HPDET_GRADIENT_0X_MASK 0x000000FF #define MADERA_OTP_HPDET_GRADIENT_0X_SHIFT 0 -#define MADERA_OTP_HPDET_GRADIENT_0X_WIDTH 8 #endif -- cgit v1.2.3 From 28faad777c2d1480dfdda6697e58c06cf9011ebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sun, 27 Sep 2020 01:59:17 +0200 Subject: mfd: tps65910: Clean up after switching to regmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove wrappers around regmap calls to remove now-useless indirection. Signed-off-by: Michał Mirosław Signed-off-by: Lee Jones --- include/linux/mfd/tps65910.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index ce4b9e743f7c..f7398d982f23 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -913,39 +913,4 @@ static inline int tps65910_chip_id(struct tps65910 *tps65910) return tps65910->id; } -static inline int tps65910_reg_read(struct tps65910 *tps65910, u8 reg, - unsigned int *val) -{ - return regmap_read(tps65910->regmap, reg, val); -} - -static inline int tps65910_reg_write(struct tps65910 *tps65910, u8 reg, - unsigned int val) -{ - return regmap_write(tps65910->regmap, reg, val); -} - -static inline int tps65910_reg_set_bits(struct tps65910 *tps65910, u8 reg, - u8 mask) -{ - return regmap_update_bits(tps65910->regmap, reg, mask, mask); -} - -static inline int tps65910_reg_clear_bits(struct tps65910 *tps65910, u8 reg, - u8 mask) -{ - return regmap_update_bits(tps65910->regmap, reg, mask, 0); -} - -static inline int tps65910_reg_update_bits(struct tps65910 *tps65910, u8 reg, - u8 mask, u8 val) -{ - return regmap_update_bits(tps65910->regmap, reg, mask, val); -} - -static inline int tps65910_irq_get_virq(struct tps65910 *tps65910, int irq) -{ - return regmap_irq_get_virq(tps65910->irq_data, irq); -} - #endif /* __LINUX_MFD_TPS65910_H */ -- cgit v1.2.3 From 9f5b98f3f4149a10c315ddb4d0bed033f398e8ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sun, 27 Sep 2020 01:59:18 +0200 Subject: mfd: tps65910: Remove unused pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Client pointers in tps65910 data are not used in the drivers. Remove those fields. Signed-off-by: Michał Mirosław Signed-off-by: Lee Jones --- include/linux/mfd/tps65910.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index f7398d982f23..701925db75b3 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -890,11 +890,6 @@ struct tps65910 { struct regmap *regmap; unsigned long id; - /* Client devices */ - struct tps65910_pmic *pmic; - struct tps65910_rtc *rtc; - struct tps65910_power *power; - /* Device node parsed board data */ struct tps65910_board *of_plat_data; -- cgit v1.2.3 From f594d01bb4aff35dc582f5418e6823f79e28834b Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 27 Oct 2020 09:41:32 +0000 Subject: mfd: madera: Add special errata reset handling for cs47l15 An errata exists for cs47l15 where the reset must be handled differently and removed before DCVDD is applied. A soft reset is used for situations where a reset is required to reset state. This does however, make this part unsuitable for DCVDD supplies with a rise time greater than 2mS. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/madera/core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/madera/core.h b/include/linux/mfd/madera/core.h index ad2c138105d4..03a8a788424a 100644 --- a/include/linux/mfd/madera/core.h +++ b/include/linux/mfd/madera/core.h @@ -186,6 +186,7 @@ struct madera { struct regulator_bulk_data core_supplies[MADERA_MAX_CORE_SUPPLIES]; struct regulator *dcvdd; bool internal_dcvdd; + bool reset_errata; struct madera_pdata pdata; -- cgit v1.2.3 From 4556fe8f16e0225f5df7a57d123e0d55717bf2aa Mon Sep 17 00:00:00 2001 From: Michael Srba Date: Tue, 10 Nov 2020 14:00:47 +0100 Subject: mfd: rt5033: Fix errorneous defines Fix regulators on rt5033 by converting some values to bitmasks which were errorneously not defined as such in the header file. Cc: Beomho Seo Fixes: 0b271258544b ("mfd: rt5033: Add Richtek RT5033 driver core.") Signed-off-by: Michael Srba Signed-off-by: Lee Jones --- include/linux/mfd/rt5033-private.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rt5033-private.h b/include/linux/mfd/rt5033-private.h index f812105c538c..2d1895c3efbf 100644 --- a/include/linux/mfd/rt5033-private.h +++ b/include/linux/mfd/rt5033-private.h @@ -91,14 +91,14 @@ enum rt5033_reg { #define RT5033_RT_HZ_MASK 0x01 /* RT5033 control register */ -#define RT5033_CTRL_FCCM_BUCK_MASK 0x00 -#define RT5033_CTRL_BUCKOMS_MASK 0x01 -#define RT5033_CTRL_LDOOMS_MASK 0x02 -#define RT5033_CTRL_SLDOOMS_MASK 0x03 -#define RT5033_CTRL_EN_BUCK_MASK 0x04 -#define RT5033_CTRL_EN_LDO_MASK 0x05 -#define RT5033_CTRL_EN_SAFE_LDO_MASK 0x06 -#define RT5033_CTRL_LDO_SLEEP_MASK 0x07 +#define RT5033_CTRL_FCCM_BUCK_MASK BIT(0) +#define RT5033_CTRL_BUCKOMS_MASK BIT(1) +#define RT5033_CTRL_LDOOMS_MASK BIT(2) +#define RT5033_CTRL_SLDOOMS_MASK BIT(3) +#define RT5033_CTRL_EN_BUCK_MASK BIT(4) +#define RT5033_CTRL_EN_LDO_MASK BIT(5) +#define RT5033_CTRL_EN_SAFE_LDO_MASK BIT(6) +#define RT5033_CTRL_LDO_SLEEP_MASK BIT(7) /* RT5033 BUCK control register */ #define RT5033_BUCK_CTRL_MASK 0x1f @@ -247,11 +247,11 @@ enum rt5033_fuel_reg { #define RT5033_FUEL_BAT_PRESENT 0x02 /* RT5033 PMIC interrupts */ -#define RT5033_PMIC_IRQ_BUCKOCP 2 -#define RT5033_PMIC_IRQ_BUCKLV 3 -#define RT5033_PMIC_IRQ_SAFELDOLV 4 -#define RT5033_PMIC_IRQ_LDOLV 5 -#define RT5033_PMIC_IRQ_OT 6 -#define RT5033_PMIC_IRQ_VDDA_UV 7 +#define RT5033_PMIC_IRQ_BUCKOCP BIT(2) +#define RT5033_PMIC_IRQ_BUCKLV BIT(3) +#define RT5033_PMIC_IRQ_SAFELDOLV BIT(4) +#define RT5033_PMIC_IRQ_LDOLV BIT(5) +#define RT5033_PMIC_IRQ_OT BIT(6) +#define RT5033_PMIC_IRQ_VDDA_UV BIT(7) #endif /* __RT5033_PRIVATE_H__ */ -- cgit v1.2.3 From 295992fb815e791d14b18ef7cdbbaf1a76211a31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 14 Sep 2020 15:09:33 +0200 Subject: mm: introduce vma_set_file function v5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the new vma_set_file() function to allow changing vma->vm_file with the necessary refcount dance. v2: add more users of this. v3: add missing EXPORT_SYMBOL, rebase on mmap cleanup, add comments why we drop the reference on two occasions. v4: make it clear that changing an anonymous vma is illegal. v5: move vma_set_file to mm/util.c Signed-off-by: Christian König Reviewed-by: Daniel Vetter (v2) Reviewed-by: Jason Gunthorpe Acked-by: Andrew Morton Link: https://patchwork.freedesktop.org/patch/399360/ --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4e..47bff16c182d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2719,6 +2719,8 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma) } #endif +void vma_set_file(struct vm_area_struct *vma, struct file *file); + #ifdef CONFIG_NUMA_BALANCING unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long start, unsigned long end); -- cgit v1.2.3 From 179a9cf79212bb3b96fb69a314583189cd863c5b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 17 Nov 2020 16:16:34 +0100 Subject: context_tracking: Don't implement exception_enter/exit() on CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK The typical steps with context tracking are: 1) Task runs in userspace 2) Task enters the kernel (syscall/exception/IRQ) 3) Task switches from context tracking state CONTEXT_USER to CONTEXT_KERNEL (user_exit()) 4) Task does stuff in kernel 5) Task switches from context tracking state CONTEXT_KERNEL to CONTEXT_USER (user_enter()) 6) Task exits the kernel If an exception fires between 5) and 6), the pt_regs and the context tracking disagree on the context of the faulted/trapped instruction. CONTEXT_KERNEL must be set before the exception handler, that's unconditional for those handlers that want to be able to call into schedule(), but CONTEXT_USER must be restored when the exception exits whereas pt_regs tells that we are resuming to kernel space. This can't be fixed with storing the context tracking state in a per-cpu or per-task variable since another exception may fire onto the current one and overwrite the saved state. Also the task can schedule. So it has to be stored in a per task stack. This is how exception_enter()/exception_exit() paper over the problem: 5) Task switches from context tracking state CONTEXT_KERNEL to CONTEXT_USER (user_enter()) 5.1) Exception fires 5.2) prev_state = exception_enter() // save CONTEXT_USER to prev_state // and set CONTEXT_KERNEL 5.3) Exception handler 5.4) exception_enter(prev_state) // restore CONTEXT_USER 5.5) Exception resumes 6) Task exits the kernel The condition to live without exception_enter()/exception_exit() is to forbid exceptions and IRQs between 2) and 3) and between 5) and 6), or if any is allowed to trigger, it won't call into context tracking, eg: NMIs, and it won't schedule. These requirements are met by architectures supporting CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK and those can therefore afford not to implement this hack. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201117151637.259084-3-frederic@kernel.org --- include/linux/context_tracking.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d53cd331c4dd..bceb06498521 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -51,7 +51,8 @@ static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; - if (!context_tracking_enabled()) + if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) || + !context_tracking_enabled()) return 0; prev_ctx = this_cpu_read(context_tracking.state); @@ -63,7 +64,8 @@ static inline enum ctx_state exception_enter(void) static inline void exception_exit(enum ctx_state prev_ctx) { - if (context_tracking_enabled()) { + if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) && + context_tracking_enabled()) { if (prev_ctx != CONTEXT_KERNEL) context_tracking_enter(prev_ctx); } -- cgit v1.2.3 From 31f6a8c0a471be7d7d05c93eac50fcb729e79b9d Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Tue, 27 Oct 2020 18:07:11 +0000 Subject: sched/topology,schedutil: Wrap sched domains rebuild Add the rebuild_sched_domains_energy() function to wrap the functionality that rebuilds the scheduling domains if any of the Energy Aware Scheduling (EAS) initialisation conditions change. This functionality is used when schedutil is added or removed or when EAS is enabled or disabled through the sched_energy_aware sysctl. Therefore, create a single function that is used in both these cases and that can be later reused. Signed-off-by: Ionela Voinescu Signed-off-by: Peter Zijlstra (Intel) Acked-by: Quentin Perret Acked-by: Rafael J. Wysocki Link: https://lkml.kernel.org/r/20201027180713.7642-2-ionela.voinescu@arm.com --- include/linux/sched/topology.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 9ef7bf686a9f..8f0f778b7c91 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -225,6 +225,14 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) #endif /* !CONFIG_SMP */ +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) +extern void rebuild_sched_domains_energy(void); +#else +static inline void rebuild_sched_domains_energy(void) +{ +} +#endif + #ifndef arch_scale_cpu_capacity /** * arch_scale_cpu_capacity - get the capacity scale factor of a given CPU. -- cgit v1.2.3 From 25ece30561d247b2931b0d11d92e9c976a668771 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 9 Nov 2020 17:34:05 +0100 Subject: rtc: nvmem: remove nvram ABI The nvram sysfs attributes have been deprecated at least since v4.13, more than 3 years ago and nobody ever complained about the deprecation warning. Remove the sysfs attributes now. [Bartosz: remove the declaration of rtc_nvmem_unregister()] Signed-off-by: Alexandre Belloni Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20201109163409.24301-5-brgl@bgdev.pl --- include/linux/rtc.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 22d1575e4991..0983ab9faffb 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -120,10 +120,6 @@ struct rtc_device { bool registered; - /* Old ABI support */ - bool nvram_old_abi; - struct bin_attribute *nvram; - time64_t range_min; timeu64_t range_max; time64_t start_secs; @@ -250,14 +246,12 @@ extern int rtc_hctosys_ret; #ifdef CONFIG_RTC_NVMEM int rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config); -void rtc_nvmem_unregister(struct rtc_device *rtc); #else static inline int rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config) { return 0; } -static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {} #endif #ifdef CONFIG_RTC_INTF_SYSFS -- cgit v1.2.3 From 3a905c2d9544a418953d6c18668f0f853fbd9be9 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 9 Nov 2020 17:34:06 +0100 Subject: rtc: add devm_ prefix to rtc_nvmem_register() rtc_nvmem_register() is a managed interface. It doesn't require any release function to be called at driver detach. To avoid confusing driver authors, let's rename it to devm_rtc_nvmem_register() and add it to the list of managed interfaces in Documentation/. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201109163409.24301-6-brgl@bgdev.pl --- include/linux/rtc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 0983ab9faffb..cbca651d8ca4 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -244,11 +244,11 @@ extern int rtc_hctosys_ret; #endif #ifdef CONFIG_RTC_NVMEM -int rtc_nvmem_register(struct rtc_device *rtc, - struct nvmem_config *nvmem_config); +int devm_rtc_nvmem_register(struct rtc_device *rtc, + struct nvmem_config *nvmem_config); #else -static inline int rtc_nvmem_register(struct rtc_device *rtc, - struct nvmem_config *nvmem_config) +static inline int devm_rtc_nvmem_register(struct rtc_device *rtc, + struct nvmem_config *nvmem_config) { return 0; } -- cgit v1.2.3 From fdcfd854333be5b30377dc5daa9cd0fa1643a979 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 9 Nov 2020 17:34:08 +0100 Subject: rtc: rework rtc_register_device() resource management rtc_register_device() is a managed interface but it doesn't use devres by itself - instead it marks an rtc_device as "registered" and the devres callback for devm_rtc_allocate_device() takes care of resource release. This doesn't correspond with the design behind devres where managed structures should not be aware of being managed. The correct solution here is to register a separate devres callback for unregistering the device. While at it: rename rtc_register_device() to devm_rtc_register_device() and add it to the list of managed interfaces in devres.rst. This way we can avoid any potential confusion of driver developers who may expect there to exist a corresponding unregister function. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201109163409.24301-8-brgl@bgdev.pl --- include/linux/rtc.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index cbca651d8ca4..55e7beed066c 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -118,8 +118,6 @@ struct rtc_device { */ long set_offset_nsec; - bool registered; - time64_t range_min; timeu64_t range_max; time64_t start_secs; @@ -157,7 +155,7 @@ extern struct rtc_device *devm_rtc_device_register(struct device *dev, const struct rtc_class_ops *ops, struct module *owner); struct rtc_device *devm_rtc_allocate_device(struct device *dev); -int __rtc_register_device(struct module *owner, struct rtc_device *rtc); +int __devm_rtc_register_device(struct module *owner, struct rtc_device *rtc); extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); @@ -234,8 +232,8 @@ static inline bool rtc_tv_nsec_ok(s64 set_offset_nsec, return false; } -#define rtc_register_device(device) \ - __rtc_register_device(THIS_MODULE, device) +#define devm_rtc_register_device(device) \ + __devm_rtc_register_device(THIS_MODULE, device) #ifdef CONFIG_RTC_HCTOSYS_DEVICE extern int rtc_hctosys_ret; -- cgit v1.2.3 From e1cef2d4c379b2aab43a7dc9601f645048209090 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:40 +0900 Subject: tools/bootconfig: Align the bootconfig applied initrd image size to 4 Align the bootconfig applied initrd image size to 4. To fill the gap, the bootconfig command uses null characters in between the bootconfig data and the footer. This will expands the footer size but don't change the checksum. Thus the block image of the initrd file with bootconfig is as follows. [initrd][bootconfig][(pad)][size][csum]["#BOOTCONFIG\n"] Link: https://lkml.kernel.org/r/160576522046.320071.8550680670010950634.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 9903088891fa..2696eb0fc149 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -12,6 +12,9 @@ #define BOOTCONFIG_MAGIC "#BOOTCONFIG\n" #define BOOTCONFIG_MAGIC_LEN 12 +#define BOOTCONFIG_ALIGN_SHIFT 2 +#define BOOTCONFIG_ALIGN (1 << BOOTCONFIG_ALIGN_SHIFT) +#define BOOTCONFIG_ALIGN_MASK (BOOTCONFIG_ALIGN - 1) /* XBC tree node */ struct xbc_node { -- cgit v1.2.3 From bc1c99a5971aa7571e8b9731c28fa32abe12cab8 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 17 Nov 2020 20:49:52 +0800 Subject: EDAC: Add DDR5 new memory type Add a new entry to 'enum mem_type' and a new string to 'edac_mem_types[]' for DDR5 new memory type. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck --- include/linux/edac.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 8f63245f7f7c..e64b73b556eb 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -181,6 +181,7 @@ static inline char *mc_event_error_type(const unsigned int err_type) * This is a variant of the DDR4 memories. * @MEM_LRDDR4: Load-Reduced DDR4 memory. * @MEM_LPDDR4: Low-Power DDR4 memory. + * @MEM_DDR5: Unbuffered DDR5 RAM * @MEM_NVDIMM: Non-volatile RAM * @MEM_WIO2: Wide I/O 2. */ @@ -208,6 +209,7 @@ enum mem_type { MEM_RDDR4, MEM_LRDDR4, MEM_LPDDR4, + MEM_DDR5, MEM_NVDIMM, MEM_WIO2, }; @@ -234,6 +236,7 @@ enum mem_type { #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) #define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) +#define MEM_FLAG_DDR5 BIT(MEM_DDR5) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) #define MEM_FLAG_WIO2 BIT(MEM_WIO2) -- cgit v1.2.3 From dfe564045c653d9e6969ccca57a8a04771d333f7 Mon Sep 17 00:00:00 2001 From: chao Date: Sun, 30 Aug 2020 23:41:17 -0700 Subject: rcu: Panic after fixed number of stalls Some stalls are transient, so that system fully recovers. This commit therefore allows users to configure the number of stalls that must happen in order to trigger kernel panic. Signed-off-by: chao Signed-off-by: Paul E. McKenney --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2f05e9128201..4b5fd3da5fe8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -536,6 +536,7 @@ extern int panic_on_warn; extern unsigned long panic_on_taint; extern bool panic_on_taint_nousertaint; extern int sysctl_panic_on_rcu_stall; +extern int sysctl_max_rcu_stall_to_panic; extern int sysctl_panic_on_stackoverflow; extern bool crash_kexec_post_notifiers; -- cgit v1.2.3 From 1eafe075bf9cb4db575be4ddf1b1c8256758714a Mon Sep 17 00:00:00 2001 From: Asif Rasheed Date: Sun, 20 Sep 2020 17:31:54 +0400 Subject: list.h: Update comment to explicitly note circular lists The students in the Operating System Lecture Section at the American University of Sharjah were confused by the header comment in include/linux/list.h, which says "Simple doubly linked list implementation". This comment means "simple" as in "not complex", but "simple" is often used in this context to mean "not circular". This commit therefore avoids this ambiguity by explicitly calling out "circular". Signed-off-by: Asif Rasheed Signed-off-by: Paul E. McKenney --- include/linux/list.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index a18c87b63376..89bdc92e75c3 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -9,7 +9,7 @@ #include /* - * Simple doubly linked list implementation. + * Circular doubly linked list implementation. * * Some of the internal functions ("__xxx") are useful when * manipulating whole lists rather than single entries, as -- cgit v1.2.3 From 2bf31d94423c8ae3ff58e38a115b177df6940399 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Nov 2020 11:18:08 +0100 Subject: jbd2: fix kernel-doc markups Kernel-doc markup should use this format: identifier - description They should not have any type before that, as otherwise the parser won't do the right thing. Also, some identifiers have different names between their prototypes and the kernel-doc markup. Reviewed-by: Jan Kara Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/72f5c6628f5f278d67625f60893ffbc2ca28d46e.1605521731.git.mchehab+huawei@kernel.org Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1c49fd62ff2e..578ff196b3ce 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -401,7 +401,7 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** - * struct jbd_inode - The jbd_inode type is the structure linking inodes in + * struct jbd2_inode - The jbd_inode type is the structure linking inodes in * ordered mode present in a transaction so that we can sync them during commit. */ struct jbd2_inode { -- cgit v1.2.3 From a24d22b225ce158651378869a6b88105c4bdb887 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 12 Nov 2020 21:20:21 -0800 Subject: crypto: sha - split sha.h into sha1.h and sha2.h Currently contains declarations for both SHA-1 and SHA-2, and contains declarations for SHA-3. This organization is inconsistent, but more importantly SHA-1 is no longer considered to be cryptographically secure. So to the extent possible, SHA-1 shouldn't be grouped together with any of the other SHA versions, and usage of it should be phased out. Therefore, split into two headers and , and make everyone explicitly specify whether they want the declarations for SHA-1, SHA-2, or both. This avoids making the SHA-1 declarations visible to files that don't want anything to do with SHA-1. It also prepares for potentially moving sha1.h into a new insecure/ or dangerous/ directory. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Acked-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- include/linux/ccp.h | 3 ++- include/linux/filter.h | 2 +- include/linux/purgatory.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index a5dfbaf2470d..868924dec5a1 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include struct ccp_device; struct ccp_cmd; diff --git a/include/linux/filter.h b/include/linux/filter.h index 72d62cbc1578..6c00140538b9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h index b950e961cfa8..d7dc1559427f 100644 --- a/include/linux/purgatory.h +++ b/include/linux/purgatory.h @@ -3,7 +3,7 @@ #define _LINUX_PURGATORY_H #include -#include +#include #include struct kexec_sha_region { -- cgit v1.2.3 From ded5ed04d85e299770dcb7e82c2127b8054a00c8 Mon Sep 17 00:00:00 2001 From: Souradeep Chowdhury Date: Wed, 30 Sep 2020 13:44:13 +0530 Subject: soc: qcom: llcc: Add configuration data for SM8150 Add LLCC configuration data for SM8150 SoC which controls LLCC behaviour. Signed-off-by: Souradeep Chowdhury Link: https://lore.kernel.org/r/957e3ae50c75720ef6227529d5ce3d4b457802e9.1601452132.git.schowdhu@codeaurora.org Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/llcc-qcom.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 90b864655822..3db6797ba6ff 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -16,6 +16,7 @@ #define LLCC_AUDIO 6 #define LLCC_MDMHPGRW 7 #define LLCC_MDM 8 +#define LLCC_MODHW 9 #define LLCC_CMPT 10 #define LLCC_GPUHTW 11 #define LLCC_GPU 12 @@ -26,6 +27,11 @@ #define LLCC_MDMHPFX 20 #define LLCC_MDMPNG 21 #define LLCC_AUDHW 22 +#define LLCC_NPU 23 +#define LLCC_WLHW 24 +#define LLCC_MODPE 29 +#define LLCC_APTCM 30 +#define LLCC_WRCACHE 31 /** * llcc_slice_desc - Cache slice descriptor -- cgit v1.2.3 From 69d98969a0540039fc04e0f22bbe9f41b0a13d66 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 10 Nov 2020 11:18:46 +0100 Subject: can: rename get_can_dlc() macro with can_cc_dlc2len() The get_can_dlc() macro is used to ensure the payload length information of the Classical CAN frame to be max 8 bytes (the CAN_MAX_DLEN). Rename the macro and use the correct constant in preparation of the len/dlc cleanup for Classical CAN frames. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201110101852.1973-3-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 41ff31795320..9bc84c6978ec 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -98,14 +98,14 @@ static inline unsigned int can_bit_time(const struct can_bittiming *bt) } /* - * get_can_dlc(value) - helper macro to cast a given data length code (dlc) - * to u8 and ensure the dlc value to be max. 8 bytes. + * can_cc_dlc2len(value) - convert a given data length code (dlc) of a + * Classical CAN frame into a valid data length of max. 8 bytes. * * To be used in the CAN netdriver receive path to ensure conformance with * ISO 11898-1 Chapter 8.4.2.3 (DLC field) */ -#define get_can_dlc(i) (min_t(u8, (i), CAN_MAX_DLC)) -#define get_canfd_dlc(i) (min_t(u8, (i), CANFD_MAX_DLC)) +#define can_cc_dlc2len(dlc) (min_t(u8, (dlc), CAN_MAX_DLEN)) +#define get_canfd_dlc(dlc) (min_t(u8, (dlc), CANFD_MAX_DLC)) /* Check for outgoing skbs that have not been created by the CAN subsystem */ static inline bool can_skb_headroom_valid(struct net_device *dev, -- cgit v1.2.3 From cd1124e76d740327be5d8f9ce3785ce1119daf4b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 10 Nov 2020 11:18:47 +0100 Subject: can: remove obsolete get_canfd_dlc() macro The macro was always used together with can_dlc2len() which sanitizes the given dlc value on its own. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201110101852.1973-4-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 1 - include/linux/can/dev/peak_canfd.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 9bc84c6978ec..802606e36b58 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -105,7 +105,6 @@ static inline unsigned int can_bit_time(const struct can_bittiming *bt) * ISO 11898-1 Chapter 8.4.2.3 (DLC field) */ #define can_cc_dlc2len(dlc) (min_t(u8, (dlc), CAN_MAX_DLEN)) -#define get_canfd_dlc(dlc) (min_t(u8, (dlc), CANFD_MAX_DLC)) /* Check for outgoing skbs that have not been created by the CAN subsystem */ static inline bool can_skb_headroom_valid(struct net_device *dev, diff --git a/include/linux/can/dev/peak_canfd.h b/include/linux/can/dev/peak_canfd.h index 5fd627e9da19..f38772fd0c07 100644 --- a/include/linux/can/dev/peak_canfd.h +++ b/include/linux/can/dev/peak_canfd.h @@ -282,7 +282,7 @@ static inline int pucan_msg_get_channel(const struct pucan_rx_msg *msg) } /* return the dlc value from any received message channel_dlc field */ -static inline int pucan_msg_get_dlc(const struct pucan_rx_msg *msg) +static inline u8 pucan_msg_get_dlc(const struct pucan_rx_msg *msg) { return msg->channel_dlc >> 4; } -- cgit v1.2.3 From 964db79d6c186cc2ecc6ae46f98eed7e0ea8cf71 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Thu, 19 Nov 2020 18:53:55 +0100 Subject: of/address: Introduce of_dma_get_max_cpu_address() Introduce of_dma_get_max_cpu_address(), which provides the highest CPU physical address addressable by all DMA masters in the system. It's specially useful for setting memory zones sizes at early boot time. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201119175400.9995-4-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas --- include/linux/of.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 5d51891cbf1a..9ed5b8532c30 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -558,6 +558,8 @@ int of_map_id(struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out); +phys_addr_t of_dma_get_max_cpu_address(struct device_node *np); + #else /* CONFIG_OF */ static inline void of_core_init(void) @@ -995,6 +997,11 @@ static inline int of_map_id(struct device_node *np, u32 id, return -EINVAL; } +static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np) +{ + return PHYS_ADDR_MAX; +} + #define of_match_ptr(_ptr) NULL #define of_match_node(_matches, _node) NULL #endif /* CONFIG_OF */ -- cgit v1.2.3 From 2b8652936f0ca9ca2e6c984ae76c7bfcda1b3f22 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 19 Nov 2020 18:53:58 +0100 Subject: arm64: mm: Set ZONE_DMA size based on early IORT scan We recently introduced a 1 GB sized ZONE_DMA to cater for platforms incorporating masters that can address less than 32 bits of DMA, in particular the Raspberry Pi 4, which has 4 or 8 GB of DRAM, but has peripherals that can only address up to 1 GB (and its PCIe host bridge can only access the bottom 3 GB) Instructing the DMA layer about these limitations is straight-forward, even though we had to fix some issues regarding memory limits set in the IORT for named components, and regarding the handling of ACPI _DMA methods. However, the DMA layer also needs to be able to allocate memory that is guaranteed to meet those DMA constraints, for bounce buffering as well as allocating the backing for consistent mappings. This is why the 1 GB ZONE_DMA was introduced recently. Unfortunately, it turns out the having a 1 GB ZONE_DMA as well as a ZONE_DMA32 causes problems with kdump, and potentially in other places where allocations cannot cross zone boundaries. Therefore, we should avoid having two separate DMA zones when possible. So let's do an early scan of the IORT, and only create the ZONE_DMA if we encounter any devices that need it. This puts the burden on the firmware to describe such limitations in the IORT, which may be redundant (and less precise) if _DMA methods are also being provided. However, it should be noted that this situation is highly unusual for arm64 ACPI machines. Also, the DMA subsystem still gives precedence to the _DMA method if implemented, and so we will not lose the ability to perform streaming DMA outside the ZONE_DMA if the _DMA method permits it. [nsaenz: unified implementation with DT's counterpart] Signed-off-by: Ard Biesheuvel Signed-off-by: Nicolas Saenz Julienne Tested-by: Jeremy Linton Acked-by: Lorenzo Pieralisi Acked-by: Hanjun Guo Cc: Jeremy Linton Cc: Lorenzo Pieralisi Cc: Nicolas Saenz Julienne Cc: Rob Herring Cc: Christoph Hellwig Cc: Robin Murphy Cc: Hanjun Guo Cc: Sudeep Holla Cc: Anshuman Khandual Link: https://lore.kernel.org/r/20201119175400.9995-7-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas --- include/linux/acpi_iort.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 20a32120bb88..1a12baa58e40 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -38,6 +38,7 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size); const struct iommu_ops *iort_iommu_configure_id(struct device *dev, const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); +phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else static inline void acpi_iort_init(void) { } static inline u32 iort_msi_map_id(struct device *dev, u32 id) @@ -55,6 +56,9 @@ static inline const struct iommu_ops *iort_iommu_configure_id( static inline int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } + +static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void) +{ return PHYS_ADDR_MAX; } #endif #endif /* __ACPI_IORT_H__ */ -- cgit v1.2.3 From 04435217f96869ac3a8f055ff68c5237a60bcd7e Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Thu, 19 Nov 2020 18:53:59 +0100 Subject: mm: Remove examples from enum zone_type comment We can't really list every setup in common code. On top of that they are unlikely to stay true for long as things change in the arch trees independently of this comment. Suggested-by: Christoph Hellwig Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20201119175400.9995-8-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas --- include/linux/mmzone.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fb3bf696c05e..9d0c454d23cd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -354,26 +354,6 @@ enum zone_type { * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit * platforms may need both zones as they support peripherals with * different DMA addressing limitations. - * - * Some examples: - * - * - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the - * rest of the lower 4G. - * - * - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on - * the specific device. - * - * - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the - * lower 4G. - * - * - powerpc only uses ZONE_DMA, the size, up to 2G, may vary - * depending on the specific device. - * - * - s390 uses ZONE_DMA fixed to the lower 2G. - * - * - ia64 and riscv only use ZONE_DMA32. - * - * - parisc uses neither. */ #ifdef CONFIG_ZONE_DMA ZONE_DMA, -- cgit v1.2.3 From 607a4672b458b12674b96724e2f9bd42a5e928c6 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 20 Nov 2020 10:55:17 +0000 Subject: firmware: arm_scmi: Add full list of sensor type enumeration SCMI v2.0 provides a big list of sensor type enumeration from the sensorUnits enumeration table of Distributed Management Task Force(DMTF) specification number DSP 0248 (Platform Level Data Model for Platform Monitoring and Control Specification). It is however not an exact replica of the sensorUnits enumeration table. Let us just update the table as per SCMI v2.0 specification. Link: https://lore.kernel.org/r/20201119174906.43862-3-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 81 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 9cd312a1ff92..13d75956aa91 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -163,11 +163,92 @@ struct scmi_sensor_info { */ enum scmi_sensor_class { NONE = 0x0, + UNSPEC = 0x1, TEMPERATURE_C = 0x2, + TEMPERATURE_F = 0x3, + TEMPERATURE_K = 0x4, VOLTAGE = 0x5, CURRENT = 0x6, POWER = 0x7, ENERGY = 0x8, + CHARGE = 0x9, + VOLTAMPERE = 0xA, + NITS = 0xB, + LUMENS = 0xC, + LUX = 0xD, + CANDELAS = 0xE, + KPA = 0xF, + PSI = 0x10, + NEWTON = 0x11, + CFM = 0x12, + RPM = 0x13, + HERTZ = 0x14, + SECS = 0x15, + MINS = 0x16, + HOURS = 0x17, + DAYS = 0x18, + WEEKS = 0x19, + MILS = 0x1A, + INCHES = 0x1B, + FEET = 0x1C, + CUBIC_INCHES = 0x1D, + CUBIC_FEET = 0x1E, + METERS = 0x1F, + CUBIC_CM = 0x20, + CUBIC_METERS = 0x21, + LITERS = 0x22, + FLUID_OUNCES = 0x23, + RADIANS = 0x24, + STERADIANS = 0x25, + REVOLUTIONS = 0x26, + CYCLES = 0x27, + GRAVITIES = 0x28, + OUNCES = 0x29, + POUNDS = 0x2A, + FOOT_POUNDS = 0x2B, + OUNCE_INCHES = 0x2C, + GAUSS = 0x2D, + GILBERTS = 0x2E, + HENRIES = 0x2F, + FARADS = 0x30, + OHMS = 0x31, + SIEMENS = 0x32, + MOLES = 0x33, + BECQUERELS = 0x34, + PPM = 0x35, + DECIBELS = 0x36, + DBA = 0x37, + DBC = 0x38, + GRAYS = 0x39, + SIEVERTS = 0x3A, + COLOR_TEMP_K = 0x3B, + BITS = 0x3C, + BYTES = 0x3D, + WORDS = 0x3E, + DWORDS = 0x3F, + QWORDS = 0x40, + PERCENTAGE = 0x41, + PASCALS = 0x42, + COUNTS = 0x43, + GRAMS = 0x44, + NEWTON_METERS = 0x45, + HITS = 0x46, + MISSES = 0x47, + RETRIES = 0x48, + OVERRUNS = 0x49, + UNDERRUNS = 0x4A, + COLLISIONS = 0x4B, + PACKETS = 0x4C, + MESSAGES = 0x4D, + CHARS = 0x4E, + ERRORS = 0x4F, + CORRECTED_ERRS = 0x50, + UNCORRECTABLE_ERRS = 0x51, + SQ_MILS = 0x52, + SQ_INCHES = 0x53, + SQ_FEET = 0x54, + SQ_CM = 0x55, + SQ_METERS = 0x56, }; /** -- cgit v1.2.3 From 1fe00b8b4276ddf335216f884cb719edbea129e1 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 19 Nov 2020 17:49:02 +0000 Subject: firmware: arm_scmi: Add SCMI v3.0 sensors descriptors extensions Add support for new SCMI v3.0 Sensors extensions related to new sensors' features, like multiple axis and update intervals, while keeping compatibility with SCMI v2.0 features. While at that, refactor and simplify all the internal helpers macros and move struct scmi_sensor_info to use only non-fixed-size typing. Link: https://lore.kernel.org/r/20201119174906.43862-3-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 139 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 135 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 13d75956aa91..0792b0be25a3 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -8,6 +8,7 @@ #ifndef _LINUX_SCMI_PROTOCOL_H #define _LINUX_SCMI_PROTOCOL_H +#include #include #include #include @@ -148,13 +149,135 @@ struct scmi_power_ops { u32 *state); }; +/** + * scmi_range_attrs - specifies a sensor or axis values' range + * @min_range: The minimum value which can be represented by the sensor/axis. + * @max_range: The maximum value which can be represented by the sensor/axis. + */ +struct scmi_range_attrs { + long long min_range; + long long max_range; +}; + +/** + * scmi_sensor_axis_info - describes one sensor axes + * @id: The axes ID. + * @type: Axes type. Chosen amongst one of @enum scmi_sensor_class. + * @scale: Power-of-10 multiplier applied to the axis unit. + * @name: NULL-terminated string representing axes name as advertised by + * SCMI platform. + * @extended_attrs: Flag to indicate the presence of additional extended + * attributes for this axes. + * @resolution: Extended attribute representing the resolution of the axes. + * Set to 0 if not reported by this axes. + * @exponent: Extended attribute representing the power-of-10 multiplier that + * is applied to the resolution field. Set to 0 if not reported by + * this axes. + * @attrs: Extended attributes representing minimum and maximum values + * measurable by this axes. Set to 0 if not reported by this sensor. + */ +struct scmi_sensor_axis_info { + unsigned int id; + unsigned int type; + int scale; + char name[SCMI_MAX_STR_SIZE]; + bool extended_attrs; + unsigned int resolution; + int exponent; + struct scmi_range_attrs attrs; +}; + +/** + * scmi_sensor_intervals_info - describes number and type of available update + * intervals + * @segmented: Flag for segmented intervals' representation. When True there + * will be exactly 3 intervals in @desc, with each entry + * representing a member of a segment in this order: + * {lowest update interval, highest update interval, step size} + * @count: Number of intervals described in @desc. + * @desc: Array of @count interval descriptor bitmask represented as detailed in + * the SCMI specification: it can be accessed using the accompanying + * macros. + * @prealloc_pool: A minimal preallocated pool of desc entries used to avoid + * lesser-than-64-bytes dynamic allocation for small @count + * values. + */ +struct scmi_sensor_intervals_info { + bool segmented; + unsigned int count; +#define SCMI_SENS_INTVL_SEGMENT_LOW 0 +#define SCMI_SENS_INTVL_SEGMENT_HIGH 1 +#define SCMI_SENS_INTVL_SEGMENT_STEP 2 + unsigned int *desc; +#define SCMI_SENS_INTVL_GET_SECS(x) FIELD_GET(GENMASK(20, 5), (x)) +#define SCMI_SENS_INTVL_GET_EXP(x) \ + ({ \ + int __signed_exp = FIELD_GET(GENMASK(4, 0), (x)); \ + \ + if (__signed_exp & BIT(4)) \ + __signed_exp |= GENMASK(31, 5); \ + __signed_exp; \ + }) +#define SCMI_MAX_PREALLOC_POOL 16 + unsigned int prealloc_pool[SCMI_MAX_PREALLOC_POOL]; +}; + +/** + * struct scmi_sensor_info - represents information related to one of the + * available sensors. + * @id: Sensor ID. + * @type: Sensor type. Chosen amongst one of @enum scmi_sensor_class. + * @scale: Power-of-10 multiplier applied to the sensor unit. + * @num_trip_points: Number of maximum configurable trip points. + * @async: Flag for asynchronous read support. + * @update: Flag for continuouos update notification support. + * @timestamped: Flag for timestamped read support. + * @tstamp_scale: Power-of-10 multiplier applied to the sensor timestamps to + * represent it in seconds. + * @num_axis: Number of supported axis if any. Reported as 0 for scalar sensors. + * @axis: Pointer to an array of @num_axis descriptors. + * @intervals: Descriptor of available update intervals. + * @sensor_config: A bitmask reporting the current sensor configuration as + * detailed in the SCMI specification: it can accessed and + * modified through the accompanying macros. + * @name: NULL-terminated string representing sensor name as advertised by + * SCMI platform. + * @extended_scalar_attrs: Flag to indicate the presence of additional extended + * attributes for this sensor. + * @sensor_power: Extended attribute representing the average power + * consumed by the sensor in microwatts (uW) when it is active. + * Reported here only for scalar sensors. + * Set to 0 if not reported by this sensor. + * @resolution: Extended attribute representing the resolution of the sensor. + * Reported here only for scalar sensors. + * Set to 0 if not reported by this sensor. + * @exponent: Extended attribute representing the power-of-10 multiplier that is + * applied to the resolution field. + * Reported here only for scalar sensors. + * Set to 0 if not reported by this sensor. + * @scalar_attrs: Extended attributes representing minimum and maximum + * measurable values by this sensor. + * Reported here only for scalar sensors. + * Set to 0 if not reported by this sensor. + */ struct scmi_sensor_info { - u32 id; - u8 type; - s8 scale; - u8 num_trip_points; + unsigned int id; + unsigned int type; + int scale; + unsigned int num_trip_points; bool async; + bool update; + bool timestamped; + int tstamp_scale; + unsigned int num_axis; + struct scmi_sensor_axis_info *axis; + struct scmi_sensor_intervals_info intervals; char name[SCMI_MAX_STR_SIZE]; + bool extended_scalar_attrs; + unsigned int sensor_power; + unsigned int resolution; + int exponent; + struct scmi_range_attrs scalar_attrs; }; /* @@ -249,6 +372,14 @@ enum scmi_sensor_class { SQ_FEET = 0x54, SQ_CM = 0x55, SQ_METERS = 0x56, + RADIANS_SEC = 0x57, + BPM = 0x58, + METERS_SEC_SQUARED = 0x59, + METERS_SEC = 0x5A, + CUBIC_METERS_SEC = 0x5B, + MM_MERCURY = 0x5C, + RADIANS_SEC_SQUARED = 0x5D, + OEM_UNIT = 0xFF }; /** -- cgit v1.2.3 From c7b74967799b1af52b3045d69d4c26836b2d41de Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 20 Nov 2020 11:04:44 +0100 Subject: can: replace can_dlc as variable/element for payload length The naming of can_dlc as element of struct can_frame and also as variable name is misleading as it claims to be a 'data length CODE' but in reality it always was a plain data length. With the indroduction of a new 'len' element in struct can_frame we can now remove can_dlc as name and make clear which of the former uses was a plain length (-> 'len') or a data length code (-> 'dlc') value. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201120100444.3199-1-socketcan@hartkopp.net [mkl: gs_usb: keep struct gs_host_frame::can_dlc as is] Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 802606e36b58..77da061c21c9 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -185,8 +185,8 @@ static inline void can_set_static_ctrlmode(struct net_device *dev, dev->mtu = CANFD_MTU; } -/* get data length from can_dlc with sanitized can_dlc */ -u8 can_dlc2len(u8 can_dlc); +/* get data length from raw data length code (DLC) */ +u8 can_dlc2len(u8 dlc); /* map the sanitized data length to an appropriate data length code */ u8 can_len2dlc(u8 len); -- cgit v1.2.3 From 3ab4ce0d6fa8c93d41df4a74ec8d2c9198be2109 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 10 Nov 2020 11:18:49 +0100 Subject: can: rename CAN FD related can_len2dlc and can_dlc2len helpers The helper functions can_len2dlc and can_dlc2len are only relevant for CAN FD data length code (DLC) conversion. To fit the introduced can_cc_dlc2len for Classical CAN we rename: can_dlc2len -> can_fd_dlc2len to get the payload length from the DLC can_len2dlc -> can_fd_len2dlc to get the DLC from the payload length Suggested-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201110101852.1973-6-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 77da061c21c9..e767a96ae075 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -186,10 +186,10 @@ static inline void can_set_static_ctrlmode(struct net_device *dev, } /* get data length from raw data length code (DLC) */ -u8 can_dlc2len(u8 dlc); +u8 can_fd_dlc2len(u8 dlc); /* map the sanitized data length to an appropriate data length code */ -u8 can_len2dlc(u8 len); +u8 can_fd_len2dlc(u8 len); struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs); -- cgit v1.2.3 From e8e73562ce0b24d691ad35df3de34b324248458f Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 10 Nov 2020 16:49:12 +0100 Subject: can: drivers: introduce helpers to access Classical CAN DLC values This patch adds the following helper to functions to access Classical CAN DLC values. can_get_cc_dlc(): get the data length code for Classical CAN raw DLC access can_frame_set_cc_len(): set len and len8_dlc value for Classical CAN raw DLC access Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201110154913.1404582-2-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index e767a96ae075..197a79535cc2 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -170,6 +170,31 @@ static inline bool can_is_canfd_skb(const struct sk_buff *skb) return skb->len == CANFD_MTU; } +/* helper to get the data length code (DLC) for Classical CAN raw DLC access */ +static inline u8 can_get_cc_dlc(const struct can_frame *cf, const u32 ctrlmode) +{ + /* return len8_dlc as dlc value only if all conditions apply */ + if ((ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC) && + (cf->len == CAN_MAX_DLEN) && + (cf->len8_dlc > CAN_MAX_DLEN && cf->len8_dlc <= CAN_MAX_RAW_DLC)) + return cf->len8_dlc; + + /* return the payload length as dlc value */ + return cf->len; +} + +/* helper to set len and len8_dlc value for Classical CAN raw DLC access */ +static inline void can_frame_set_cc_len(struct can_frame *cf, const u8 dlc, + const u32 ctrlmode) +{ + /* the caller already ensured that dlc is a value from 0 .. 15 */ + if (ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC && dlc > CAN_MAX_DLEN) + cf->len8_dlc = dlc; + + /* limit the payload length 'len' to CAN_MAX_DLEN */ + cf->len = can_cc_dlc2len(dlc); +} + /* helper to define static CAN controller features at device creation time */ static inline void can_set_static_ctrlmode(struct net_device *dev, u32 static_mode) -- cgit v1.2.3 From 757055ae8dedf5333af17b3b5b4b70ba9bc9da4e Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 11 Nov 2020 14:54:49 +0100 Subject: init/console: Use ttynull as a fallback when there is no console stdin, stdout, and stderr standard I/O stream are created for the init process. They are not available when there is no console registered for /dev/console. It might lead to a crash when the init process tries to use them, see the commit 48021f98130880dd742 ("printk: handle blank console arguments passed in."). Normally, ttySX and ttyX consoles are used as a fallback when no consoles are defined via the command line, device tree, or SPCR. But there will be no console registered when an invalid console name is configured or when the configured consoles do not exist on the system. Users even try to avoid the console intentionally, for example, by using console="" or console=null. It is used on production systems where the serial port or terminal are not visible to users. Pushing messages to these consoles would just unnecessary slowdown the system. Make sure that stdin, stdout, stderr, and /dev/console are always available by a fallback to the existing ttynull driver. It has been implemented for exactly this purpose but it was used only when explicitly configured. Reviewed-by: Greg Kroah-Hartman Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20201111135450.11214-2-pmladek@suse.com --- include/linux/console.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 4b1e26c4cb42..9c662e41cde5 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -187,9 +187,12 @@ extern int braille_register_console(struct console *, int index, extern int braille_unregister_console(struct console *); #ifdef CONFIG_TTY extern void console_sysfs_notify(void); +extern void register_ttynull_console(void); #else static inline void console_sysfs_notify(void) { } +static inline void register_ttynull_console(void) +{ } #endif extern bool console_suspend_enabled; -- cgit v1.2.3 From 341917490d7d68d2f7267a265b8820fc3f8ead1b Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Wed, 18 Nov 2020 23:49:20 +0100 Subject: PCI: Decode PCIe 64 GT/s link speed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCIe r6.0, sec 7.5.3.18, defines a new 64.0 GT/s bit in the Supported Link Speeds Vector of Link Capabilities 2. This patch does not affect the speed of the link, which should be negotiated automatically by the hardware; it only adds decoding when showing the speed to the user. Decode this new speed. Previously, reading the speed of a link operating at this speed showed "Unknown speed" instead of "64.0 GT/s". Link: https://lore.kernel.org/r/aaaab33fe18975e123a84aebce2adb85f44e2bbe.1605739760.git.gustavo.pimentel@synopsys.com Signed-off-by: Gustavo Pimentel Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 22207a79762c..e007bc3e8b6e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -281,6 +281,7 @@ enum pci_bus_speed { PCIE_SPEED_8_0GT = 0x16, PCIE_SPEED_16_0GT = 0x17, PCIE_SPEED_32_0GT = 0x18, + PCIE_SPEED_64_0GT = 0x19, PCI_SPEED_UNKNOWN = 0xff, }; -- cgit v1.2.3 From 956fb852181e4e4b16ccad62511e0038d3ed4928 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 18 Nov 2020 17:14:14 +0530 Subject: octeontx2-pf: move lmt flush to include/linux/soc On OcteonTX2 platform CPT instruction enqueue and NIX packet send are only possible via LMTST operations which uses LDEOR instruction. This patch moves lmt flush function from OcteonTX2 nic driver to include/linux/soc since it will be used by OcteonTX2 CPT and NIC driver for LMTST. Signed-off-by: Suheil Chandran Signed-off-by: Srujana Challa Signed-off-by: Jakub Kicinski --- include/linux/soc/marvell/octeontx2/asm.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/linux/soc/marvell/octeontx2/asm.h (limited to 'include/linux') diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h new file mode 100644 index 000000000000..ae2279fe830a --- /dev/null +++ b/include/linux/soc/marvell/octeontx2/asm.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __SOC_OTX2_ASM_H +#define __SOC_OTX2_ASM_H + +#if defined(CONFIG_ARM64) +/* + * otx2_lmt_flush is used for LMT store operation. + * On octeontx2 platform CPT instruction enqueue and + * NIX packet send are only possible via LMTST + * operations and it uses LDEOR instruction targeting + * the coprocessor address. + */ +#define otx2_lmt_flush(ioaddr) \ +({ \ + u64 result = 0; \ + __asm__ volatile(".cpu generic+lse\n" \ + "ldeor xzr, %x[rf], [%[rs]]" \ + : [rf]"=r" (result) \ + : [rs]"r" (ioaddr)); \ + (result); \ +}) +#else +#define otx2_lmt_flush(ioaddr) ({ 0; }) +#endif + +#endif /* __SOC_OTX2_ASM_H */ -- cgit v1.2.3 From 0d8315dddd2899f519fe1ca3d4d5cdaf44ea421e Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Wed, 11 Nov 2020 07:33:54 -0600 Subject: seccomp/cache: Report cache data through /proc/pid/seccomp_cache Currently the kernel does not provide an infrastructure to translate architecture numbers to a human-readable name. Translating syscall numbers to syscall names is possible through FTRACE_SYSCALL infrastructure but it does not provide support for compat syscalls. This will create a file for each PID as /proc/pid/seccomp_cache. The file will be empty when no seccomp filters are loaded, or be in the format of: where ALLOW means the cache is guaranteed to allow the syscall, and filter means the cache will pass the syscall to the BPF filter. For the docker default profile on x86_64 it looks like: x86_64 0 ALLOW x86_64 1 ALLOW x86_64 2 ALLOW x86_64 3 ALLOW [...] x86_64 132 ALLOW x86_64 133 ALLOW x86_64 134 FILTER x86_64 135 FILTER x86_64 136 FILTER x86_64 137 ALLOW x86_64 138 ALLOW x86_64 139 FILTER x86_64 140 ALLOW x86_64 141 ALLOW [...] This file is guarded by CONFIG_SECCOMP_CACHE_DEBUG with a default of N because I think certain users of seccomp might not want the application to know which syscalls are definitely usable. For the same reason, it is also guarded by CAP_SYS_ADMIN. Suggested-by: Jann Horn Link: https://lore.kernel.org/lkml/CAG48ez3Ofqp4crXGksLmZY6=fGrF_tWyUCg7PBkAetvbbOPeOA@mail.gmail.com/ Signed-off-by: YiFei Zhu Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/94e663fa53136f5a11f432c661794d1ee7060779.1605101222.git.yifeifz2@illinois.edu --- include/linux/seccomp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 02aef2844c38..76963ec4641a 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -121,4 +121,11 @@ static inline long seccomp_get_metadata(struct task_struct *task, return -EINVAL; } #endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */ + +#ifdef CONFIG_SECCOMP_CACHE_DEBUG +struct seq_file; + +int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +#endif #endif /* _LINUX_SECCOMP_H */ -- cgit v1.2.3 From 4ae21993f07422ec1cb83e9530f87fa61bff02bd Mon Sep 17 00:00:00 2001 From: Antonio Cardace Date: Wed, 18 Nov 2020 21:45:17 +0100 Subject: ethtool: add ETHTOOL_COALESCE_ALL_PARAMS define This bitmask represents all existing coalesce parameters. Signed-off-by: Antonio Cardace Reviewed-by: Michal Kubecek Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 6408b446051f..e3da25b51ae4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -215,6 +215,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_COALESCE_TX_USECS_HIGH BIT(19) #define ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH BIT(20) #define ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL BIT(21) +#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(21, 0) #define ETHTOOL_COALESCE_USECS \ (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) -- cgit v1.2.3 From 5164c788985702ad94fa4ce6adca29bf280876df Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 29 Sep 2020 15:59:43 +0300 Subject: iio: triggered-buffer: add {devm_}iio_triggered_buffer_setup_ext variants This change adds a parameter to the {devm_}iio_triggered_buffer_setup() functions to assign the extra sysfs buffer attributes that are typically assigned via iio_buffer_set_attrs(). The functions also get renamed to iio_triggered_buffer_setup_ext() & devm_iio_triggered_buffer_setup_ext(). For backwards compatibility the old {devm_}iio_triggered_buffer_setup() functions are now macros wrap the new (renamed) functions with NULL for the buffer attrs. The aim is to remove iio_buffer_set_attrs(), so in the iio_triggered_buffer_setup_ext() function the attributes are assigned directly to 'buffer->attrs'. When adding multiple IIO buffers per IIO device, it can be pretty cumbersome to first allocate a set of buffers, then to dig them out of IIO to assign extra attributes (with iio_buffer_set_attrs()). Naturally, the best way would be to provide them at allocation time, which is what this change does. At this moment, buffers allocated with {devm_}iio_triggered_buffer_setup() are the only ones in mainline IIO to call iio_buffer_set_attrs(). Signed-off-by: Alexandru Ardelean Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200929125949.69934-4-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/triggered_buffer.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/triggered_buffer.h b/include/linux/iio/triggered_buffer.h index e99c91799359..7f154d1f8739 100644 --- a/include/linux/iio/triggered_buffer.h +++ b/include/linux/iio/triggered_buffer.h @@ -4,19 +4,28 @@ #include +struct attribute; struct iio_dev; struct iio_buffer_setup_ops; -int iio_triggered_buffer_setup(struct iio_dev *indio_dev, +int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev, irqreturn_t (*h)(int irq, void *p), irqreturn_t (*thread)(int irq, void *p), - const struct iio_buffer_setup_ops *setup_ops); + const struct iio_buffer_setup_ops *setup_ops, + const struct attribute **buffer_attrs); void iio_triggered_buffer_cleanup(struct iio_dev *indio_dev); -int devm_iio_triggered_buffer_setup(struct device *dev, - struct iio_dev *indio_dev, - irqreturn_t (*h)(int irq, void *p), - irqreturn_t (*thread)(int irq, void *p), - const struct iio_buffer_setup_ops *ops); +#define iio_triggered_buffer_setup(indio_dev, h, thread, setup_ops) \ + iio_triggered_buffer_setup_ext((indio_dev), (h), (thread), (setup_ops), NULL) + +int devm_iio_triggered_buffer_setup_ext(struct device *dev, + struct iio_dev *indio_dev, + irqreturn_t (*h)(int irq, void *p), + irqreturn_t (*thread)(int irq, void *p), + const struct iio_buffer_setup_ops *ops, + const struct attribute **buffer_attrs); + +#define devm_iio_triggered_buffer_setup(dev, indio_dev, h, thread, setup_ops) \ + devm_iio_triggered_buffer_setup_ext((dev), (indio_dev), (h), (thread), (setup_ops), NULL) #endif -- cgit v1.2.3 From 21232b4456ba5e1eea7385bd3c4b1994994fd409 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 29 Sep 2020 15:59:49 +0300 Subject: iio: buffer: remove iio_buffer_set_attrs() helper The iio_buffer_set_attrs() is no longer used in the drivers, so it can be removed now. Signed-off-by: Alexandru Ardelean Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200929125949.69934-10-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index fbba4093f06c..8febc23f5f26 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -11,9 +11,6 @@ struct iio_buffer; -void iio_buffer_set_attrs(struct iio_buffer *buffer, - const struct attribute **attrs); - int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data); /** -- cgit v1.2.3 From e2083d36739168f7b612312160cf7bb45b251408 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 19 Nov 2020 17:49:04 +0000 Subject: firmware: arm_scmi: Add SCMI v3.0 sensors timestamped reads Add new .reading_get_timestamped() method to sensor_ops to support SCMI v3.0 timestamped reads. Link: https://lore.kernel.org/r/20201119174906.43862-5-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 0792b0be25a3..0c52bf0cbee4 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -149,6 +149,20 @@ struct scmi_power_ops { u32 *state); }; +/** + * scmi_sensor_reading - represent a timestamped read + * + * Used by @reading_get_timestamped method. + * + * @value: The signed value sensor read. + * @timestamp: An unsigned timestamp for the sensor read, as provided by + * SCMI platform. Set to zero when not available. + */ +struct scmi_sensor_reading { + long long value; + unsigned long long timestamp; +}; + /** * scmi_range_attrs - specifies a sensor or axis values' range * @min_range: The minimum value which can be represented by the sensor/axis. @@ -390,6 +404,11 @@ enum scmi_sensor_class { * @info_get: get the information of the specified sensor * @trip_point_config: selects and configures a trip-point of interest * @reading_get: gets the current value of the sensor + * @reading_get_timestamped: gets the current value and timestamp, when + * available, of the sensor. (as of v3.0 spec) + * Supports multi-axis sensors for sensors which + * supports it and if the @reading array size of + * @count entry equals the sensor num_axis */ struct scmi_sensor_ops { int (*count_get)(const struct scmi_handle *handle); @@ -399,6 +418,9 @@ struct scmi_sensor_ops { u32 sensor_id, u8 trip_id, u64 trip_value); int (*reading_get)(const struct scmi_handle *handle, u32 sensor_id, u64 *value); + int (*reading_get_timestamped)(const struct scmi_handle *handle, + u32 sensor_id, u8 count, + struct scmi_sensor_reading *readings); }; /** -- cgit v1.2.3 From 7b83c5f41088987d04e24c3af0e1fb9f43b747b5 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 19 Nov 2020 17:49:05 +0000 Subject: firmware: arm_scmi: Add SCMI v3.0 sensor configuration support Add SCMI v3.0 sensor support for CONFIG_GET/CONFIG_SET commands. Link: https://lore.kernel.org/r/20201119174906.43862-6-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 0c52bf0cbee4..7e9e2cd3d46b 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -286,7 +286,38 @@ struct scmi_sensor_info { unsigned int num_axis; struct scmi_sensor_axis_info *axis; struct scmi_sensor_intervals_info intervals; + unsigned int sensor_config; +#define SCMI_SENS_CFG_UPDATE_SECS_MASK GENMASK(31, 16) +#define SCMI_SENS_CFG_GET_UPDATE_SECS(x) \ + FIELD_GET(SCMI_SENS_CFG_UPDATE_SECS_MASK, (x)) + +#define SCMI_SENS_CFG_UPDATE_EXP_MASK GENMASK(15, 11) +#define SCMI_SENS_CFG_GET_UPDATE_EXP(x) \ + ({ \ + int __signed_exp = \ + FIELD_GET(SCMI_SENS_CFG_UPDATE_EXP_MASK, (x)); \ + \ + if (__signed_exp & BIT(4)) \ + __signed_exp |= GENMASK(31, 5); \ + __signed_exp; \ + }) + +#define SCMI_SENS_CFG_ROUND_MASK GENMASK(10, 9) +#define SCMI_SENS_CFG_ROUND_AUTO 2 +#define SCMI_SENS_CFG_ROUND_UP 1 +#define SCMI_SENS_CFG_ROUND_DOWN 0 + +#define SCMI_SENS_CFG_TSTAMP_ENABLED_MASK BIT(1) +#define SCMI_SENS_CFG_TSTAMP_ENABLE 1 +#define SCMI_SENS_CFG_TSTAMP_DISABLE 0 +#define SCMI_SENS_CFG_IS_TSTAMP_ENABLED(x) \ + FIELD_GET(SCMI_SENS_CFG_TSTAMP_ENABLED_MASK, (x)) + +#define SCMI_SENS_CFG_SENSOR_ENABLED_MASK BIT(0) +#define SCMI_SENS_CFG_SENSOR_ENABLE 1 +#define SCMI_SENS_CFG_SENSOR_DISABLE 0 char name[SCMI_MAX_STR_SIZE]; +#define SCMI_SENS_CFG_IS_ENABLED(x) FIELD_GET(BIT(0), (x)) bool extended_scalar_attrs; unsigned int sensor_power; unsigned int resolution; @@ -409,6 +440,8 @@ enum scmi_sensor_class { * Supports multi-axis sensors for sensors which * supports it and if the @reading array size of * @count entry equals the sensor num_axis + * @config_get: Get sensor current configuration + * @config_set: Set sensor current configuration */ struct scmi_sensor_ops { int (*count_get)(const struct scmi_handle *handle); @@ -421,6 +454,10 @@ struct scmi_sensor_ops { int (*reading_get_timestamped)(const struct scmi_handle *handle, u32 sensor_id, u8 count, struct scmi_sensor_reading *readings); + int (*config_get)(const struct scmi_handle *handle, + u32 sensor_id, u32 *sensor_config); + int (*config_set)(const struct scmi_handle *handle, + u32 sensor_id, u32 sensor_config); }; /** -- cgit v1.2.3 From e3811190acf85c63518fbddaa28bcbfab2baa58d Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 19 Nov 2020 17:49:06 +0000 Subject: firmware: arm_scmi: Add SCMI v3.0 sensor notifications Add support for new SCMI v3.0 SENSOR_UPDATE notification. Link: https://lore.kernel.org/r/20201119174906.43862-7-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 7e9e2cd3d46b..be0be5ff7514 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -657,6 +657,7 @@ enum scmi_notification_events { SCMI_EVENT_PERFORMANCE_LIMITS_CHANGED = 0x0, SCMI_EVENT_PERFORMANCE_LEVEL_CHANGED = 0x1, SCMI_EVENT_SENSOR_TRIP_POINT_EVENT = 0x0, + SCMI_EVENT_SENSOR_UPDATE = 0x1, SCMI_EVENT_RESET_ISSUED = 0x0, SCMI_EVENT_BASE_ERROR_EVENT = 0x0, SCMI_EVENT_SYSTEM_POWER_STATE_NOTIFIER = 0x0, @@ -698,6 +699,14 @@ struct scmi_sensor_trip_point_report { unsigned int trip_point_desc; }; +struct scmi_sensor_update_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int sensor_id; + unsigned int readings_count; + struct scmi_sensor_reading readings[]; +}; + struct scmi_reset_issued_report { ktime_t timestamp; unsigned int agent_id; -- cgit v1.2.3 From bc2dc4406c463174613047d8b7946e12c8808cda Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 21 Nov 2020 22:17:01 -0800 Subject: compiler-clang: remove version check for BPF Tracing bpftrace parses the kernel headers and uses Clang under the hood. Remove the version check when __BPF_TRACING__ is defined (as bpftrace does) so that this tool can continue to parse kernel headers, even with older clang sources. Fixes: commit 1f7a44f63e6c ("compiler-clang: add build check for clang 10.0.1") Reported-by: Chen Yu Reported-by: Jarkko Sakkinen Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Tested-by: Jarkko Sakkinen Acked-by: Jarkko Sakkinen Acked-by: Song Liu Acked-by: Nathan Chancellor Acked-by: Miguel Ojeda Link: https://lkml.kernel.org/r/20201104191052.390657-1-ndesaulniers@google.com Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index dd7233c48bf3..98cff1b4b088 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -8,8 +8,10 @@ + __clang_patchlevel__) #if CLANG_VERSION < 100001 +#ifndef __BPF_TRACING__ # error Sorry, your version of Clang is too old - please use 10.0.1 or newer. #endif +#endif /* Compiler specific definitions for Clang compiler */ -- cgit v1.2.3 From a927bd6ba952d13c52b8b385030943032f659a3e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 21 Nov 2020 22:17:05 -0800 Subject: mm: fix phys_to_target_node() and memory_add_physaddr_to_nid() exports The core-mm has a default __weak implementation of phys_to_target_node() to mirror the weak definition of memory_add_physaddr_to_nid(). That symbol is exported for modules. However, while the export in mm/memory_hotplug.c exported the symbol in the configuration cases of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=y ...and: CONFIG_NUMA_KEEP_MEMINFO=n CONFIG_MEMORY_HOTPLUG=y ...it failed to export the symbol in the case of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=n Not only is that broken, but Christoph points out that the kernel should not be exporting any __weak symbol, which means that memory_add_physaddr_to_nid() example that phys_to_target_node() copied is broken too. Rework the definition of phys_to_target_node() and memory_add_physaddr_to_nid() to not require weak symbols. Move to the common arch override design-pattern of an asm header defining a symbol to replace the default implementation. The only common header that all memory_add_physaddr_to_nid() producing architectures implement is asm/sparsemem.h. In fact, powerpc already defines its memory_add_physaddr_to_nid() helper in sparsemem.h. Double-down on that observation and define phys_to_target_node() where necessary in asm/sparsemem.h. An alternate consideration that was discarded was to put this override in asm/numa.h, but that entangles with the definition of MAX_NUMNODES relative to the inclusion of linux/nodemask.h, and requires powerpc to grow a new header. The dependency on NUMA_KEEP_MEMINFO for DEV_DAX_HMEM_DEVICES is invalid now that the symbol is properly exported / stubbed in all combinations of CONFIG_NUMA_KEEP_MEMINFO and CONFIG_MEMORY_HOTPLUG. [dan.j.williams@intel.com: v4] Link: https://lkml.kernel.org/r/160461461867.1505359.5301571728749534585.stgit@dwillia2-desk3.amr.corp.intel.com [dan.j.williams@intel.com: powerpc: fix create_section_mapping compile warning] Link: https://lkml.kernel.org/r/160558386174.2948926.2740149041249041764.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: a035b6bf863e ("mm/memory_hotplug: introduce default phys_to_target_node() implementation") Reported-by: Randy Dunlap Reported-by: Thomas Gleixner Reported-by: kernel test robot Reported-by: Christoph Hellwig Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Tested-by: Randy Dunlap Tested-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Christoph Hellwig Cc: Joao Martins Cc: Tony Luck Cc: Fenghua Yu Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Vishal Verma Cc: Stephen Rothwell Link: https://lkml.kernel.org/r/160447639846.1133764.7044090803980177548.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 14 -------------- include/linux/numa.h | 30 +++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index d65c6fdc5cfc..551093b74596 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -281,20 +281,6 @@ static inline bool movable_node_is_enabled(void) } #endif /* ! CONFIG_MEMORY_HOTPLUG */ -#ifdef CONFIG_NUMA -extern int memory_add_physaddr_to_nid(u64 start); -extern int phys_to_target_node(u64 start); -#else -static inline int memory_add_physaddr_to_nid(u64 start) -{ - return 0; -} -static inline int phys_to_target_node(u64 start) -{ - return 0; -} -#endif - #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * pgdat resizing functions diff --git a/include/linux/numa.h b/include/linux/numa.h index 8cb33ccfb671..cb44cfe2b725 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -21,13 +21,41 @@ #endif #ifdef CONFIG_NUMA +#include +#include + /* Generic implementation available */ int numa_map_to_online_node(int node); -#else + +#ifndef memory_add_physaddr_to_nid +static inline int memory_add_physaddr_to_nid(u64 start) +{ + pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#ifndef phys_to_target_node +static inline int phys_to_target_node(u64 start) +{ + pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#else /* !CONFIG_NUMA */ static inline int numa_map_to_online_node(int node) { return NUMA_NO_NODE; } +static inline int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +static inline int phys_to_target_node(u64 start) +{ + return 0; +} #endif #endif /* _LINUX_NUMA_H */ -- cgit v1.2.3 From 4349a83a3190c1d4414371161b0f4a4c3ccd3f9d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 21 Nov 2020 22:17:08 -0800 Subject: mm: fix readahead_page_batch for retry entries Both btrfs and fuse have reported faults caused by seeing a retry entry instead of the page they were looking for. This was caused by a missing check in the iterator. As can be seen in the below panic log, the accessing 0x402 causes a panic. In the xarray.h, 0x402 means RETRY_ENTRY. BUG: kernel NULL pointer dereference, address: 0000000000000402 CPU: 14 PID: 306003 Comm: as Not tainted 5.9.0-1-amd64 #1 Debian 5.9.1-1 Hardware name: Lenovo ThinkSystem SR665/7D2VCTO1WW, BIOS D8E106Q-1.01 05/30/2020 RIP: 0010:fuse_readahead+0x152/0x470 [fuse] Code: 41 8b 57 18 4c 8d 54 10 ff 4c 89 d6 48 8d 7c 24 10 e8 d2 e3 28 f9 48 85 c0 0f 84 fe 00 00 00 44 89 f2 49 89 04 d4 44 8d 72 01 <48> 8b 10 41 8b 4f 1c 48 c1 ea 10 83 e2 01 80 fa 01 19 d2 81 e2 01 RSP: 0018:ffffad99ceaebc50 EFLAGS: 00010246 RAX: 0000000000000402 RBX: 0000000000000001 RCX: 0000000000000002 RDX: 0000000000000000 RSI: ffff94c5af90bd98 RDI: ffffad99ceaebc60 RBP: ffff94ddc1749a00 R08: 0000000000000402 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000100 R12: ffff94de6c429ce0 R13: ffff94de6c4d3700 R14: 0000000000000001 R15: ffffad99ceaebd68 FS: 00007f228c5c7040(0000) GS:ffff94de8ed80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000402 CR3: 0000001dbd9b4000 CR4: 0000000000350ee0 Call Trace: read_pages+0x83/0x270 page_cache_readahead_unbounded+0x197/0x230 generic_file_buffered_read+0x57a/0xa20 new_sync_read+0x112/0x1a0 vfs_read+0xf8/0x180 ksys_read+0x5f/0xe0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 042124cc64c3 ("mm: add new readahead_control API") Reported-by: David Sterba Reported-by: Wonhyuk Yang Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Link: https://lkml.kernel.org/r/20201103142852.8543-1-willy@infradead.org Link: https://lkml.kernel.org/r/20201103124349.16722-1-vvghjk1234@gmail.com Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e1e19c1f9ec9..d5570deff400 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -906,6 +906,8 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, xas_set(&xas, rac->_index); rcu_read_lock(); xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) { + if (xas_retry(&xas, page)) + continue; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; -- cgit v1.2.3 From e44cdff05145b84293e3f424daa17e4f3ce0109c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 19 Nov 2020 17:45:09 +0100 Subject: clk: samsung: Allow compile testing of Exynos, S3C64xx and S5Pv210 So far all Exynos, S3C64xx and S5Pv210 clock units were selected by respective SOC/ARCH Kconfig option. On a kernel built for selected SoCs, this allowed to build only limited set of matching clock drivers. However compile testing was not possible in such case as Makefile object depends on SOC/ARCH option. Add separate Kconfig options for each of them to be able to compile test. Link: https://lore.kernel.org/r/20201119164509.754851-1-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski Acked-by: Chanwoo Choi Signed-off-by: Sylwester Nawrocki --- include/linux/clk/samsung.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/samsung.h b/include/linux/clk/samsung.h index 79097e365f7f..38b774001712 100644 --- a/include/linux/clk/samsung.h +++ b/include/linux/clk/samsung.h @@ -10,7 +10,7 @@ struct device_node; -#ifdef CONFIG_ARCH_S3C64XX +#ifdef CONFIG_S3C64XX_COMMON_CLK void s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f, unsigned long xusbxti_f, bool s3c6400, void __iomem *base); @@ -19,7 +19,7 @@ static inline void s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f, unsigned long xusbxti_f, bool s3c6400, void __iomem *base) { } -#endif /* CONFIG_ARCH_S3C64XX */ +#endif /* CONFIG_S3C64XX_COMMON_CLK */ #ifdef CONFIG_S3C2410_COMMON_CLK void s3c2410_common_clk_init(struct device_node *np, unsigned long xti_f, -- cgit v1.2.3 From 769dda58d1f647a45270db2f02efe2e2de856709 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Nov 2020 15:02:10 +0100 Subject: irqstat: Get rid of nmi_count() and __IRQ_STAT() Nothing uses this anymore. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20201113141733.005212732@linutronix.de --- include/linux/irq_cpustat.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h index 6e8895cd4d92..78fb2de3ea4d 100644 --- a/include/linux/irq_cpustat.h +++ b/include/linux/irq_cpustat.h @@ -19,10 +19,6 @@ #ifndef __ARCH_IRQ_STAT DECLARE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat); /* defined in asm/hardirq.h */ -#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat.member, cpu)) #endif -/* arch dependent irq_stat fields */ -#define nmi_count(cpu) __IRQ_STAT((cpu), __nmi_count) /* i386 */ - #endif /* __irq_cpustat_h */ -- cgit v1.2.3 From e091bc90cd2d65f48e4688faead2911558d177d7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Nov 2020 15:02:16 +0100 Subject: irqstat: Move declaration into asm-generic/hardirq.h Move the declaration of the irq_cpustat per cpu variable to asm-generic/hardirq.h and remove the now empty linux/irq_cpustat.h header. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20201113141733.737377332@linutronix.de --- include/linux/irq_cpustat.h | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 include/linux/irq_cpustat.h (limited to 'include/linux') diff --git a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h deleted file mode 100644 index 78fb2de3ea4d..000000000000 --- a/include/linux/irq_cpustat.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __irq_cpustat_h -#define __irq_cpustat_h - -/* - * Contains default mappings for irq_cpustat_t, used by almost every - * architecture. Some arch (like s390) have per cpu hardware pages and - * they define their own mappings for irq_stat. - * - * Keith Owens July 2000. - */ - - -/* - * Simple wrappers reducing source bloat. Define all irq_stat fields - * here, even ones that are arch dependent. That way we get common - * definitions instead of differing sets for each arch. - */ - -#ifndef __ARCH_IRQ_STAT -DECLARE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat); /* defined in asm/hardirq.h */ -#endif - -#endif /* __irq_cpustat_h */ -- cgit v1.2.3 From 15115830c88751ba83068aa37da996602ddc6a61 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Nov 2020 15:02:17 +0100 Subject: preempt: Cleanup the macro maze a bit Make the macro maze consistent and prepare it for adding the RT variant for BH accounting. - Use nmi_count() for the NMI portion of preempt count - Introduce in_hardirq() to make the naming consistent and non-ambiguos - Use the macros to create combined checks (e.g. in_task()) so the softirq representation for RT just falls into place. - Update comments and move the deprecated macros aside Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20201113141733.864469886@linutronix.de --- include/linux/preempt.h | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7d9c1c0e149c..7547857516d5 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -77,31 +77,33 @@ /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ #include +#define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #define softirq_count() (preempt_count() & SOFTIRQ_MASK) -#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ - | NMI_MASK)) +#define irq_count() (nmi_count() | hardirq_count() | softirq_count()) /* - * Are we doing bottom half or hardware interrupt processing? + * Macros to retrieve the current execution context: * - * in_irq() - We're in (hard) IRQ context + * in_nmi() - We're in NMI context + * in_hardirq() - We're in hard IRQ context + * in_serving_softirq() - We're in softirq context + * in_task() - We're in task context + */ +#define in_nmi() (nmi_count()) +#define in_hardirq() (hardirq_count()) +#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) +#define in_task() (!(in_nmi() | in_hardirq() | in_serving_softirq())) + +/* + * The following macros are deprecated and should not be used in new code: + * in_irq() - Obsolete version of in_hardirq() * in_softirq() - We have BH disabled, or are processing softirqs * in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled - * in_serving_softirq() - We're in softirq context - * in_nmi() - We're in NMI context - * in_task() - We're in task context - * - * Note: due to the BH disabled confusion: in_softirq(),in_interrupt() really - * should not be used in new code. */ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) -#define in_nmi() (preempt_count() & NMI_MASK) -#define in_task() (!(preempt_count() & \ - (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) /* * The preempt_count offset after preempt_disable(); -- cgit v1.2.3 From cb4789b0d19ff231ce9f73376a023341300aed96 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 6 Nov 2020 16:50:47 +0100 Subject: iommu/ioasid: Add ioasid references Let IOASID users take references to existing ioasids with ioasid_get(). ioasid_put() drops a reference and only frees the ioasid when its reference number is zero. It returns true if the ioasid was freed. For drivers that don't call ioasid_get(), ioasid_put() is the same as ioasid_free(). Signed-off-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20201106155048.997886-2-jean-philippe@linaro.org Signed-off-by: Will Deacon --- include/linux/ioasid.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index 6f000d7a0ddc..e9dacd4b9f6b 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -34,7 +34,8 @@ struct ioasid_allocator_ops { #if IS_ENABLED(CONFIG_IOASID) ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, void *private); -void ioasid_free(ioasid_t ioasid); +void ioasid_get(ioasid_t ioasid); +bool ioasid_put(ioasid_t ioasid); void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)); int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); @@ -48,10 +49,15 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, return INVALID_IOASID; } -static inline void ioasid_free(ioasid_t ioasid) +static inline void ioasid_get(ioasid_t ioasid) { } +static inline bool ioasid_put(ioasid_t ioasid) +{ + return false; +} + static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)) { -- cgit v1.2.3 From b713c195d59332277a31a59c91f755e53b5b302b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 5 Sep 2020 11:13:35 -0600 Subject: net: provide __sys_shutdown_sock() that takes a socket No functional changes in this patch, needed to provide io_uring support for shutdown(2). Cc: netdev@vger.kernel.org Cc: David S. Miller Acked-by: Jakub Kicinski Signed-off-by: Jens Axboe --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index e9cb30d8cbfb..385894b4a8bb 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -436,6 +436,7 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); +extern int __sys_shutdown_sock(struct socket *sock, int how); extern int __sys_shutdown(int fd, int how); extern struct ns_common *get_net_ns(struct ns_common *ns); -- cgit v1.2.3 From 23acdc76f1798b090bb9dcc90671cd29d929834e Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 12 Nov 2020 18:53:34 -0800 Subject: signal: clear non-uapi flag bits when passing/returning sa_flags Previously we were not clearing non-uapi flag bits in sigaction.sa_flags when storing the userspace-provided sa_flags or when returning them via oldact. Start doing so. This allows userspace to detect missing support for flag bits and allows the kernel to use non-uapi bits internally, as we are already doing in arch/x86 for two flag bits. Now that this change is in place, we no longer need the code in arch/x86 that was hiding these bits from userspace, so remove it. This is technically a userspace-visible behavior change for sigaction, as the unknown bits returned via oldact.sa_flags are no longer set. However, we are free to define the behavior for unknown bits exactly because their behavior is currently undefined, so for now we can define the meaning of each of them to be "clear the bit in oldact.sa_flags unless the bit becomes known in the future". Furthermore, this behavior is consistent with OpenBSD [1], illumos [2] and XNU [3] (FreeBSD [4] and NetBSD [5] fail the syscall if unknown bits are set). So there is some precedent for this behavior in other kernels, and in particular in XNU, which is probably the most popular kernel among those that I looked at, which means that this change is less likely to be a compatibility issue. Link: [1] https://github.com/openbsd/src/blob/f634a6a4b5bf832e9c1de77f7894ae2625e74484/sys/kern/kern_sig.c#L278 Link: [2] https://github.com/illumos/illumos-gate/blob/76f19f5fdc974fe5be5c82a556e43a4df93f1de1/usr/src/uts/common/syscall/sigaction.c#L86 Link: [3] https://github.com/apple/darwin-xnu/blob/a449c6a3b8014d9406c2ddbdc81795da24aa7443/bsd/kern/kern_sig.c#L480 Link: [4] https://github.com/freebsd/freebsd/blob/eded70c37057857c6e23fae51f86b8f8f43cd2d0/sys/kern/kern_sig.c#L699 Link: [5] https://github.com/NetBSD/src/blob/3365779becdcedfca206091a645a0e8e22b2946e/sys/kern/sys_sig.c#L473 Signed-off-by: Peter Collingbourne Reviewed-by: Dave Martin Acked-by: "Eric W. Biederman" Link: https://linux-review.googlesource.com/id/I35aab6f5be932505d90f3b3450c083b4db1eca86 Link: https://lkml.kernel.org/r/878dbcb5f47bc9b11881c81f745c0bef5c23f97f.1605235762.git.pcc@google.com Signed-off-by: Eric W. Biederman --- include/linux/signal_types.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index f8a90ae9c6ec..a7887ad84d36 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -68,4 +68,16 @@ struct ksignal { int sig; }; +#ifndef __ARCH_UAPI_SA_FLAGS +#ifdef SA_RESTORER +#define __ARCH_UAPI_SA_FLAGS SA_RESTORER +#else +#define __ARCH_UAPI_SA_FLAGS 0 +#endif +#endif + +#define UAPI_SA_FLAGS \ + (SA_NOCLDSTOP | SA_NOCLDWAIT | SA_SIGINFO | SA_ONSTACK | SA_RESTART | \ + SA_NODEFER | SA_RESETHAND | __ARCH_UAPI_SA_FLAGS) + #endif /* _LINUX_SIGNAL_TYPES_H */ -- cgit v1.2.3 From 6ac05e832a9e96f9b1c42a8917cdd317d7b6c8fa Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 20 Nov 2020 12:33:45 -0800 Subject: signal: define the SA_EXPOSE_TAGBITS bit in sa_flags Architectures that support address tagging, such as arm64, may want to expose fault address tag bits to the signal handler to help diagnose memory errors. However, these bits have not been previously set, and their presence may confuse unaware user applications. Therefore, introduce a SA_EXPOSE_TAGBITS flag bit in sa_flags that a signal handler may use to explicitly request that the bits are set. The generic signal handler APIs expect to receive tagged addresses. Architectures may specify how to untag addresses in the case where SA_EXPOSE_TAGBITS is clear by defining the arch_untagged_si_addr function. Signed-off-by: Peter Collingbourne Acked-by: "Eric W. Biederman" Link: https://linux-review.googlesource.com/id/I16dd0ed2081f091fce97be0190cb8caa874c26cb Link: https://lkml.kernel.org/r/13cf24d00ebdd8e1f55caf1821c7c29d54100191.1605904350.git.pcc@google.com Signed-off-by: Eric W. Biederman --- include/linux/signal.h | 14 ++++++++++++++ include/linux/signal_types.h | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index b256f9c65661..205526c4003a 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -469,4 +469,18 @@ struct seq_file; extern void render_sigset_t(struct seq_file *, const char *, sigset_t *); #endif +#ifndef arch_untagged_si_addr +/* + * Given a fault address and a signal and si_code which correspond to the + * _sigfault union member, returns the address that must appear in si_addr if + * the signal handler does not have SA_EXPOSE_TAGBITS enabled in sa_flags. + */ +static inline void __user *arch_untagged_si_addr(void __user *addr, + unsigned long sig, + unsigned long si_code) +{ + return addr; +} +#endif + #endif /* _LINUX_SIGNAL_H */ diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index a7887ad84d36..68e06c75c5b2 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -78,6 +78,6 @@ struct ksignal { #define UAPI_SA_FLAGS \ (SA_NOCLDSTOP | SA_NOCLDWAIT | SA_SIGINFO | SA_ONSTACK | SA_RESTART | \ - SA_NODEFER | SA_RESETHAND | __ARCH_UAPI_SA_FLAGS) + SA_NODEFER | SA_RESETHAND | SA_EXPOSE_TAGBITS | __ARCH_UAPI_SA_FLAGS) #endif /* _LINUX_SIGNAL_TYPES_H */ -- cgit v1.2.3 From 4e1d9a737d00f2cc811dc5654f82c92c7d80e98c Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 19 Nov 2020 08:25:39 +0100 Subject: PM: sleep: Add dev_wakeup_path() helper Add dev_wakeup_path() helper to avoid to spread dev->power.wakeup_path test in drivers. Signed-off-by: Patrice Chotard Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index aa3da6611533..196a157456aa 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -84,6 +84,11 @@ static inline bool device_may_wakeup(struct device *dev) return dev->power.can_wakeup && !!dev->power.wakeup; } +static inline bool device_wakeup_path(struct device *dev) +{ + return dev->power.wakeup_path; +} + static inline void device_set_wakeup_path(struct device *dev) { dev->power.wakeup_path = true; @@ -174,6 +179,11 @@ static inline bool device_may_wakeup(struct device *dev) return dev->power.can_wakeup && dev->power.should_wakeup; } +static inline bool device_wakeup_path(struct device *dev) +{ + return false; +} + static inline void device_set_wakeup_path(struct device *dev) {} static inline void __pm_stay_awake(struct wakeup_source *ws) {} -- cgit v1.2.3 From a94ef811f7c3748736b85db0406da8e4ea391ac6 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Thu, 19 Nov 2020 09:43:25 -0700 Subject: PM: domains: replace -ENOTSUPP with -EOPNOTSUPP While submitting a patch to add next_wakeup, checkpatch reported this - WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP + return -ENOTSUPP; Address the above warning in other functions in pm_domain.h. Reviewed-by: Ulf Hansson Signed-off-by: Lina Iyer Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1ad0ec481416..5fd20d117cbe 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -255,24 +255,24 @@ static inline int pm_genpd_init(struct generic_pm_domain *genpd, } static inline int pm_genpd_remove(struct generic_pm_domain *genpd) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int dev_pm_genpd_remove_notifier(struct device *dev) { - return -ENOTSUPP; + return -EOPNOTSUPP; } #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) @@ -325,13 +325,13 @@ struct device *genpd_dev_pm_attach_by_name(struct device *dev, static inline int of_genpd_add_provider_simple(struct device_node *np, struct generic_pm_domain *genpd) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int of_genpd_add_provider_onecell(struct device_node *np, struct genpd_onecell_data *data) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline void of_genpd_del_provider(struct device_node *np) {} @@ -387,7 +387,7 @@ static inline struct device *genpd_dev_pm_attach_by_name(struct device *dev, static inline struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) { - return ERR_PTR(-ENOTSUPP); + return ERR_PTR(-EOPNOTSUPP); } #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ -- cgit v1.2.3 From 8eb621698fd4c49703d512fe437d84ab822bc59e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 16 Sep 2020 11:12:03 +0100 Subject: keys: Provide the original description to the key preparser Provide the proposed description (add key) or the original description (update/instantiate key) when preparsing a key so that the key type can validate it against the data. This is important for rxrpc server keys as we need to check that they have the right amount of key material present - and it's better to do that when the key is loaded rather than deep in trying to process a response packet. Signed-off-by: David Howells cc: Jarkko Sakkinen cc: keyrings@vger.kernel.org --- include/linux/key-type.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 2ab2d6d6aeab..7d985a1dfe4a 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -29,6 +29,7 @@ struct kernel_pkey_params; * clear the contents. */ struct key_preparsed_payload { + const char *orig_description; /* Actual or proposed description (maybe NULL) */ char *description; /* Proposed key description (or NULL) */ union key_payload payload; /* Proposed payload */ const void *data; /* Raw data */ -- cgit v1.2.3 From 076d38b88c4146fd5506933d440b881741b6ab60 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Fri, 20 Nov 2020 09:41:04 +0100 Subject: net: ptp: introduce common defines for PTP message types Using PTP wide defines will obsolete different driver internal defines and uses of magic numbers. Signed-off-by: Christian Eggers Cc: Kurt Kanzenbach Reviewed-by: Vladimir Oltean Reviewed-by: Richard Cochran Signed-off-by: Jakub Kicinski --- include/linux/ptp_classify.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index c6487b7ab026..ae04968a3a47 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -31,6 +31,11 @@ #define PTP_CLASS_V2_VLAN (PTP_CLASS_V2 | PTP_CLASS_VLAN) #define PTP_CLASS_L4 (PTP_CLASS_IPV4 | PTP_CLASS_IPV6) +#define PTP_MSGTYPE_SYNC 0x0 +#define PTP_MSGTYPE_DELAY_REQ 0x1 +#define PTP_MSGTYPE_PDELAY_REQ 0x2 +#define PTP_MSGTYPE_PDELAY_RESP 0x3 + #define PTP_EV_PORT 319 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */ @@ -140,7 +145,7 @@ static inline u8 ptp_get_msgtype(const struct ptp_header *hdr, /* The return is meaningless. The stub function would not be * executed since no available header from ptp_parse_header. */ - return 0; + return PTP_MSGTYPE_SYNC; } #endif #endif /* _PTP_CLASSIFY_H_ */ -- cgit v1.2.3 From 3df98d79215ace13d1e91ddfc5a67a0f5acbd83f Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Sun, 27 Sep 2020 22:38:26 -0400 Subject: lsm,selinux: pass flowi_common instead of flowi to the LSM hooks As pointed out by Herbert in a recent related patch, the LSM hooks do not have the necessary address family information to use the flowi struct safely. As none of the LSMs currently use any of the protocol specific flowi information, replace the flowi pointers with pointers to the address family independent flowi_common struct. Reported-by: Herbert Xu Acked-by: James Morris Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 4 ++-- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 23 ++++++++++++++--------- 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 32a940117e7a..f70984c8318b 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -311,7 +311,7 @@ LSM_HOOK(int, 0, secmark_relabel_packet, u32 secid) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_inc, void) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void) LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req, - struct flowi *fl) + struct flowi_common *flic) LSM_HOOK(int, 0, tun_dev_alloc_security, void **security) LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security) LSM_HOOK(int, 0, tun_dev_create, void) @@ -351,7 +351,7 @@ LSM_HOOK(int, 0, xfrm_state_delete_security, struct xfrm_state *x) LSM_HOOK(int, 0, xfrm_policy_lookup, struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) LSM_HOOK(int, 1, xfrm_state_pol_flow_match, struct xfrm_state *x, - struct xfrm_policy *xp, const struct flowi *fl) + struct xfrm_policy *xp, const struct flowi_common *flic) LSM_HOOK(int, 0, xfrm_decode_session, struct sk_buff *skb, u32 *secid, int ckall) #endif /* CONFIG_SECURITY_NETWORK_XFRM */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index c503f7ab8afb..a19adef1f088 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1105,7 +1105,7 @@ * @xfrm_state_pol_flow_match: * @x contains the state to match. * @xp contains the policy to check for a match. - * @fl contains the flow to check for a match. + * @flic contains the flowi_common struct to check for a match. * Return 1 if there is a match. * @xfrm_decode_session: * @skb points to skb to decode. diff --git a/include/linux/security.h b/include/linux/security.h index bc2725491560..469273e8b46d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -167,7 +167,7 @@ struct sk_buff; struct sock; struct sockaddr; struct socket; -struct flowi; +struct flowi_common; struct dst_entry; struct xfrm_selector; struct xfrm_policy; @@ -1355,8 +1355,9 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u int security_sk_alloc(struct sock *sk, int family, gfp_t priority); void security_sk_free(struct sock *sk); void security_sk_clone(const struct sock *sk, struct sock *newsk); -void security_sk_classify_flow(struct sock *sk, struct flowi *fl); -void security_req_classify_flow(const struct request_sock *req, struct flowi *fl); +void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic); +void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic); void security_sock_graft(struct sock*sk, struct socket *parent); int security_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct request_sock *req); @@ -1507,11 +1508,13 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) { } -static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) +static inline void security_sk_classify_flow(struct sock *sk, + struct flowi_common *flic) { } -static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +static inline void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic) { } @@ -1638,9 +1641,9 @@ void security_xfrm_state_free(struct xfrm_state *x); int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int security_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl); + const struct flowi_common *flic); int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid); -void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl); +void security_skb_classify_flow(struct sk_buff *skb, struct flowi_common *flic); #else /* CONFIG_SECURITY_NETWORK_XFRM */ @@ -1692,7 +1695,8 @@ static inline int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_s } static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, const struct flowi *fl) + struct xfrm_policy *xp, + const struct flowi_common *flic) { return 1; } @@ -1702,7 +1706,8 @@ static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) return 0; } -static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +static inline void security_skb_classify_flow(struct sk_buff *skb, + struct flowi_common *flic) { } -- cgit v1.2.3 From cc69837fcaf467426ca19e5790085c26146a2300 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 20 Nov 2020 14:50:52 -0800 Subject: net: don't include ethtool.h from netdevice.h linux/netdevice.h is included in very many places, touching any of its dependecies causes large incremental builds. Drop the linux/ethtool.h include, linux/netdevice.h just needs a forward declaration of struct ethtool_ops. Fix all the places which made use of this implicit include. Acked-by: Johannes Berg Acked-by: Shannon Nelson Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20201120225052.1427503-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- include/linux/qed/qed_if.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 03433a4c929e..0049e8fe4905 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -34,7 +34,6 @@ #include #include -#include #include #ifdef CONFIG_DCB #include @@ -51,6 +50,7 @@ struct netpoll_info; struct device; +struct ethtool_ops; struct phy_device; struct dsa_port; struct ip_tunnel_parm; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 57fb295ea41a..68d17a4fbf20 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -7,6 +7,7 @@ #ifndef _QED_IF_H #define _QED_IF_H +#include #include #include #include -- cgit v1.2.3 From d549699048b4b5c22dd710455bcdb76966e55aa3 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Sat, 21 Nov 2020 08:28:17 +0200 Subject: net/packet: fix packet receive on L3 devices without visible hard header In the patchset merged by commit b9fcf0a0d826 ("Merge branch 'support-AF_PACKET-for-layer-3-devices'") L3 devices which did not have header_ops were given one for the purpose of protocol parsing on af_packet transmit path. That change made af_packet receive path regard these devices as having a visible L3 header and therefore aligned incoming skb->data to point to the skb's mac_header. Some devices, such as ipip, xfrmi, and others, do not reset their mac_header prior to ingress and therefore their incoming packets became malformed. Ideally these devices would reset their mac headers, or af_packet would be able to rely on dev->hard_header_len being 0 for such cases, but it seems this is not the case. Fix by changing af_packet RX ll visibility criteria to include the existence of a '.create()' header operation, which is used when creating a device hard header - via dev_hard_header() - by upper layers, and does not exist in these L3 devices. As this predicate may be useful in other situations, add it as a common dev_has_header() helper in netdevice.h. Fixes: b9fcf0a0d826 ("Merge branch 'support-AF_PACKET-for-layer-3-devices'") Signed-off-by: Eyal Birger Acked-by: Jason A. Donenfeld Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20201121062817.3178900-1-eyal.birger@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..fa275a054f46 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3137,6 +3137,11 @@ static inline bool dev_validate_header(const struct net_device *dev, return false; } +static inline bool dev_has_header(const struct net_device *dev) +{ + return dev->header_ops && dev->header_ops->create; +} + typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len, int size); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); -- cgit v1.2.3 From b7cab9be7c16128a0de21ed7ae67211838813437 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 4 Nov 2020 23:23:58 +0800 Subject: soundwire: SDCA: detect sdca_cascade interrupt The SoundWire 1.2 specification defines an "SDCA cascade" bit which handles a logical OR of all SDCA interrupt sources (up to 30 defined). Due to limitations of the addressing space, this bit is located in the SDW_DP0_INT register when DP0 is used, or alternatively in the DP0_SDCA_Support_INTSTAT register when DP0 is not used. To allow for both cases to be handled, this bit will be checked in the main device-level interrupt handling code. This will result in the register being read twice if DP0 is enabled, but it's not clear how to optimize this case. It's also more logical to deal with this interrupt at the device than the port level, this bit is really not DP0 specific and its location in the DP0_INTSTAT bit is only due to the lack of free space in SCP_INTSTAT_1. The SDCA_Cascade bit cannot be masked or cleared, so the interrupt handling only forwards the detection to the Slave driver, which will deal with reading the relevant SDCA status bits and clearing them. The bus driver only signals the detection. The communication with the Slave driver is based on the same interrupt callback, with only an extension to provide the status of the sdca_cascade bit. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Guennadi Liakhovetski Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20201104152358.9518-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 41cc1192f9aa..f0b01b728640 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -359,6 +359,7 @@ struct sdw_dpn_prop { * @sink_dpn_prop: Sink Data Port N properties * @scp_int1_mask: SCP_INT1_MASK desired settings * @quirks: bitmask identifying deltas from the MIPI specification + * @is_sdca: the Slave supports the SDCA specification */ struct sdw_slave_prop { u32 mipi_revision; @@ -382,6 +383,7 @@ struct sdw_slave_prop { struct sdw_dpn_prop *sink_dpn_prop; u8 scp_int1_mask; u32 quirks; + bool is_sdca; }; #define SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY BIT(0) @@ -479,10 +481,12 @@ struct sdw_slave_id { /** * struct sdw_slave_intr_status - Slave interrupt status + * @sdca_cascade: set if the Slave device reports an SDCA interrupt * @control_port: control port status * @port: data port status */ struct sdw_slave_intr_status { + bool sdca_cascade; u8 control_port; u8 port[15]; }; -- cgit v1.2.3 From 74d862b682f51e45d25b95b1ecf212428a4967b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Nov 2020 20:48:42 +0100 Subject: sched: Make migrate_disable/enable() independent of RT Now that the scheduler can deal with migrate disable properly, there is no real compelling reason to make it only available for RT. There are quite some code pathes which needlessly disable preemption in order to prevent migration and some constructs like kmap_atomic() enforce it implicitly. Making it available independent of RT allows to provide a preemptible variant of kmap_atomic() and makes the code more consistent in general. Signed-off-by: Thomas Gleixner Grudgingly-Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201118204007.269943012@linutronix.de --- include/linux/kernel.h | 21 ++++++++++++++------- include/linux/preempt.h | 38 +++----------------------------------- include/linux/sched.h | 2 +- 3 files changed, 18 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2f05e9128201..665837f9a831 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -204,6 +204,7 @@ extern int _cond_resched(void); extern void ___might_sleep(const char *file, int line, int preempt_offset); extern void __might_sleep(const char *file, int line, int preempt_offset); extern void __cant_sleep(const char *file, int line, int preempt_offset); +extern void __cant_migrate(const char *file, int line); /** * might_sleep - annotation for functions that can sleep @@ -227,6 +228,18 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset); # define cant_sleep() \ do { __cant_sleep(__FILE__, __LINE__, 0); } while (0) # define sched_annotate_sleep() (current->task_state_change = 0) + +/** + * cant_migrate - annotation for functions that cannot migrate + * + * Will print a stack trace if executed in code which is migratable + */ +# define cant_migrate() \ + do { \ + if (IS_ENABLED(CONFIG_SMP)) \ + __cant_migrate(__FILE__, __LINE__); \ + } while (0) + /** * non_block_start - annotate the start of section where sleeping is prohibited * @@ -251,6 +264,7 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset); int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) # define cant_sleep() do { } while (0) +# define cant_migrate() do { } while (0) # define sched_annotate_sleep() do { } while (0) # define non_block_start() do { } while (0) # define non_block_end() do { } while (0) @@ -258,13 +272,6 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset); #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) -#ifndef CONFIG_PREEMPT_RT -# define cant_migrate() cant_sleep() -#else - /* Placeholder for now */ -# define cant_migrate() do { } while (0) -#endif - /** * abs - return absolute value of an argument * @x: the value. If it is unsigned type, it is converted to signed type first. diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 8b43922e65df..6df63cbe8bb0 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -322,7 +322,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, #endif -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) +#ifdef CONFIG_SMP /* * Migrate-Disable and why it is undesired. @@ -382,43 +382,11 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, extern void migrate_disable(void); extern void migrate_enable(void); -#elif defined(CONFIG_PREEMPT_RT) +#else static inline void migrate_disable(void) { } static inline void migrate_enable(void) { } -#else /* !CONFIG_PREEMPT_RT */ - -/** - * migrate_disable - Prevent migration of the current task - * - * Maps to preempt_disable() which also disables preemption. Use - * migrate_disable() to annotate that the intent is to prevent migration, - * but not necessarily preemption. - * - * Can be invoked nested like preempt_disable() and needs the corresponding - * number of migrate_enable() invocations. - */ -static __always_inline void migrate_disable(void) -{ - preempt_disable(); -} - -/** - * migrate_enable - Allow migration of the current task - * - * Counterpart to migrate_disable(). - * - * As migrate_disable() can be invoked nested, only the outermost invocation - * reenables migration. - * - * Currently mapped to preempt_enable(). - */ -static __always_inline void migrate_enable(void) -{ - preempt_enable(); -} - -#endif /* CONFIG_SMP && CONFIG_PREEMPT_RT */ +#endif /* CONFIG_SMP */ #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3af9d52fe093..a33f35f68060 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -715,7 +715,7 @@ struct task_struct { const cpumask_t *cpus_ptr; cpumask_t cpus_mask; void *migration_pending; -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) +#ifdef CONFIG_SMP unsigned short migration_disabled; #endif unsigned short migration_flags; -- cgit v1.2.3 From 5fbda3ecd14a5343644979c98d6eb65b7e7de9d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Nov 2020 20:48:43 +0100 Subject: sched: highmem: Store local kmaps in task struct Instead of storing the map per CPU provide and use per task storage. That prepares for local kmaps which are preemptible. The context switch code is preparatory and not yet in use because kmap_atomic() runs with preemption disabled. Will be made usable in the next step. The context switch logic is safe even when an interrupt happens after clearing or before restoring the kmaps. The kmap index in task struct is not modified so any nesting kmap in an interrupt will use unused indices and on return the counter is the same as before. Also add an assert into the return to user space code. Going back to user space with an active kmap local is a nono. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201118204007.372935758@linutronix.de --- include/linux/highmem-internal.h | 10 ++++++++++ include/linux/sched.h | 9 +++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 6ceed907b14e..c5a22177db85 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -9,6 +9,16 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); void *__kmap_local_page_prot(struct page *page, pgprot_t prot); void kunmap_local_indexed(void *vaddr); +void kmap_local_fork(struct task_struct *tsk); +void __kmap_local_sched_out(void); +void __kmap_local_sched_in(void); +static inline void kmap_assert_nomap(void) +{ + DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx); +} +#else +static inline void kmap_local_fork(struct task_struct *tsk) { } +static inline void kmap_assert_nomap(void) { } #endif #ifdef CONFIG_HIGHMEM diff --git a/include/linux/sched.h b/include/linux/sched.h index a33f35f68060..8946911cee9f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,6 +34,7 @@ #include #include #include +#include /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -629,6 +630,13 @@ struct wake_q_node { struct wake_q_node *next; }; +struct kmap_ctrl { +#ifdef CONFIG_KMAP_LOCAL + int idx; + pte_t pteval[KM_MAX_IDX]; +#endif +}; + struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -1294,6 +1302,7 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif + struct kmap_ctrl kmap_ctrl; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif -- cgit v1.2.3 From f3ba3c710ac5a30cd058615a9eb62d2ad95bb782 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Nov 2020 20:48:44 +0100 Subject: mm/highmem: Provide kmap_local* Now that the kmap atomic index is stored in task struct provide a preemptible variant. On context switch the maps of an outgoing task are removed and the map of the incoming task are restored. That's obviously slow, but highmem is slow anyway. The kmap_local.*() functions can be invoked from both preemptible and atomic context. kmap local sections disable migration to keep the resulting virtual mapping address correct, but disable neither pagefaults nor preemption. A wholesale conversion of kmap_atomic to be fully preemptible is not possible because some of the usage sites might rely on the preemption disable for serialization or on the implicit pagefault disable. Needs to be done on a case by case basis. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201118204007.468533059@linutronix.de --- include/linux/highmem-internal.h | 48 ++++++++++++++++++++++++++++++++++++++++ include/linux/highmem.h | 43 +++++++++++++++++++++-------------- 2 files changed, 75 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index c5a22177db85..1bbe96dc8be6 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -68,6 +68,26 @@ static inline void kmap_flush_unused(void) __kmap_flush_unused(); } +static inline void *kmap_local_page(struct page *page) +{ + return __kmap_local_page_prot(page, kmap_prot); +} + +static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +{ + return __kmap_local_page_prot(page, prot); +} + +static inline void *kmap_local_pfn(unsigned long pfn) +{ + return __kmap_local_pfn_prot(pfn, kmap_prot); +} + +static inline void __kunmap_local(void *vaddr) +{ + kunmap_local_indexed(vaddr); +} + static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) { preempt_disable(); @@ -140,6 +160,28 @@ static inline void kunmap(struct page *page) #endif } +static inline void *kmap_local_page(struct page *page) +{ + return page_address(page); +} + +static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +{ + return kmap_local_page(page); +} + +static inline void *kmap_local_pfn(unsigned long pfn) +{ + return kmap_local_page(pfn_to_page(pfn)); +} + +static inline void __kunmap_local(void *addr) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(addr); +#endif +} + static inline void *kmap_atomic(struct page *page) { preempt_disable(); @@ -181,4 +223,10 @@ do { \ __kunmap_atomic(__addr); \ } while (0) +#define kunmap_local(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_local(__addr); \ +} while (0) + #endif diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 7d098bd621f6..f597830f26b4 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -60,24 +60,22 @@ static inline struct page *kmap_to_page(void *addr); static inline void kmap_flush_unused(void); /** - * kmap_atomic - Atomically map a page for temporary usage + * kmap_local_page - Map a page for temporary usage * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * - * Side effect: On return pagefaults and preemption are disabled. - * * Can be invoked from any context. * * Requires careful handling when nesting multiple mappings because the map * management is stack based. The unmap has to be in the reverse order of * the map operation: * - * addr1 = kmap_atomic(page1); - * addr2 = kmap_atomic(page2); + * addr1 = kmap_local_page(page1); + * addr2 = kmap_local_page(page2); * ... - * kunmap_atomic(addr2); - * kunmap_atomic(addr1); + * kunmap_local(addr2); + * kunmap_local(addr1); * * Unmapping addr1 before addr2 is invalid and causes malfunction. * @@ -88,10 +86,26 @@ static inline void kmap_flush_unused(void); * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * - * While it is significantly faster than kmap() it comes with restrictions - * about the pointer validity and the side effects of disabling page faults - * and preemption. Use it only when absolutely necessary, e.g. from non - * preemptible contexts. + * While it is significantly faster than kmap() for the higmem case it + * comes with restrictions about the pointer validity. Only use when really + * necessary. + * + * On HIGHMEM enabled systems mapping a highmem page has the side effect of + * disabling migration in order to keep the virtual address stable across + * preemption. No caller of kmap_local_page() can rely on this side effect. + */ +static inline void *kmap_local_page(struct page *page); + +/** + * kmap_atomic - Atomically map a page for temporary usage - Deprecated! + * @page: Pointer to the page to be mapped + * + * Returns: The virtual address of the mapping + * + * Effectively a wrapper around kmap_local_page() which disables pagefaults + * and preemption. + * + * Do not use in new code. Use kmap_local_page() instead. */ static inline void *kmap_atomic(struct page *page); @@ -101,12 +115,9 @@ static inline void *kmap_atomic(struct page *page); * * Counterpart to kmap_atomic(). * - * Undoes the side effects of kmap_atomic(), i.e. reenabling pagefaults and + * Effectively a wrapper around kunmap_local() which additionally undoes + * the side effects of kmap_atomic(), i.e. reenabling pagefaults and * preemption. - * - * Other than that a NOOP for CONFIG_HIGHMEM=n and for mappings of pages - * in the low memory area. For real highmen pages the mapping which was - * established with kmap_atomic() is destroyed. */ /* Highmem related interfaces for management code */ -- cgit v1.2.3 From e66f6e095486f0210fcf3c5eb3ecf13fa348be4c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Nov 2020 20:48:45 +0100 Subject: io-mapping: Provide iomap_local variant Similar to kmap local provide a iomap local variant which only disables migration, but neither disables pagefaults nor preemption. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201118204007.561220818@linutronix.de --- include/linux/io-mapping.h | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 60e7c83e4904..c093e81310a9 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -82,6 +82,21 @@ io_mapping_unmap_atomic(void __iomem *vaddr) preempt_enable(); } +static inline void __iomem * +io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset) +{ + resource_size_t phys_addr; + + BUG_ON(offset >= mapping->size); + phys_addr = mapping->base + offset; + return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); +} + +static inline void io_mapping_unmap_local(void __iomem *vaddr) +{ + kunmap_local_indexed((void __force *)vaddr); +} + static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset, @@ -101,7 +116,7 @@ io_mapping_unmap(void __iomem *vaddr) iounmap(vaddr); } -#else +#else /* HAVE_ATOMIC_IOMAP */ #include @@ -166,7 +181,18 @@ io_mapping_unmap_atomic(void __iomem *vaddr) preempt_enable(); } -#endif /* HAVE_ATOMIC_IOMAP */ +static inline void __iomem * +io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset) +{ + return io_mapping_map_wc(mapping, offset, PAGE_SIZE); +} + +static inline void io_mapping_unmap_local(void __iomem *vaddr) +{ + io_mapping_unmap(vaddr); +} + +#endif /* !HAVE_ATOMIC_IOMAP */ static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, -- cgit v1.2.3 From acfdd18591eaac25446e976a0c0d190f8b3dbfb1 Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Mon, 23 Nov 2020 21:52:41 -0800 Subject: firmware: xilinx: Use hash-table for api feature check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently array of fix length PM_API_MAX is used to cache the pm_api version (valid or invalid). However ATF based PM APIs values are much higher then PM_API_MAX. So to include ATF based PM APIs also, use hash-table to store the pm_api version status. Signed-off-by: Amit Sunil Dhamne Reported-by: Arnd Bergmann  Signed-off-by: Ravi Patel Signed-off-by: Rajan Vaja Reviewed-by: Arnd Bergmann Tested-by: Michal Simek Fixes: f3217d6f2f7a ("firmware: xilinx: fix out-of-bounds access") Cc: stable Link: https://lore.kernel.org/r/1606197161-25976-1-git-send-email-rajan.vaja@xilinx.com Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 5968df82b991..41a1bab98b7e 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -50,10 +50,6 @@ #define ZYNQMP_PM_CAPABILITY_WAKEUP 0x4U #define ZYNQMP_PM_CAPABILITY_UNUSABLE 0x8U -/* Feature check status */ -#define PM_FEATURE_INVALID -1 -#define PM_FEATURE_UNCHECKED 0 - /* * Firmware FPGA Manager flags * XILINX_ZYNQMP_PM_FPGA_FULL: FPGA full reconfiguration -- cgit v1.2.3 From 7a9f50a05843fee8366bd3a65addbebaa7cf7f07 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2020 11:51:29 +0200 Subject: irq_work: Cleanup Get rid of the __call_single_node union and clean up the API a little to avoid external code relying on the structure layout as much. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/irq_work.h | 33 +++++++++++++++++++++------------ include/linux/irqflags.h | 4 ++-- 2 files changed, 23 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 30823780c192..ec2a47a81e42 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -14,28 +14,37 @@ */ struct irq_work { - union { - struct __call_single_node node; - struct { - struct llist_node llnode; - atomic_t flags; - }; - }; + struct __call_single_node node; void (*func)(struct irq_work *); }; +#define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){ \ + .node = { .u_flags = (_flags), }, \ + .func = (_func), \ +} + +#define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0) +#define IRQ_WORK_INIT_LAZY(_func) __IRQ_WORK_INIT(_func, IRQ_WORK_LAZY) +#define IRQ_WORK_INIT_HARD(_func) __IRQ_WORK_INIT(_func, IRQ_WORK_HARD_IRQ) + +#define DEFINE_IRQ_WORK(name, _f) \ + struct irq_work name = IRQ_WORK_INIT(_f) + static inline void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) { - atomic_set(&work->flags, 0); - work->func = func; + *work = IRQ_WORK_INIT(func); } -#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { \ - .flags = ATOMIC_INIT(0), \ - .func = (_f) \ +static inline bool irq_work_is_pending(struct irq_work *work) +{ + return atomic_read(&work->node.a_flags) & IRQ_WORK_PENDING; } +static inline bool irq_work_is_busy(struct irq_work *work) +{ + return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY; +} bool irq_work_queue(struct irq_work *work); bool irq_work_queue_on(struct irq_work *work, int cpu); diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 3ed4e8771b64..fef2d43a7a1d 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -109,12 +109,12 @@ do { \ # define lockdep_irq_work_enter(__work) \ do { \ - if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\ + if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\ current->irq_config = 1; \ } while (0) # define lockdep_irq_work_exit(__work) \ do { \ - if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\ + if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\ current->irq_config = 0; \ } while (0) -- cgit v1.2.3 From 545b8c8df41f9ecbaf806332d4095bc4bc7c14e8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2020 11:29:31 +0200 Subject: smp: Cleanup smp_call_function*() Get rid of the __call_single_node union and cleanup the API a little to avoid external code relying on the structure layout as much. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/smp.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 9f13966d3d92..70c6f6284dcf 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -21,24 +21,23 @@ typedef bool (*smp_cond_func_t)(int cpu, void *info); * structure shares (partial) layout with struct irq_work */ struct __call_single_data { - union { - struct __call_single_node node; - struct { - struct llist_node llist; - unsigned int flags; -#ifdef CONFIG_64BIT - u16 src, dst; -#endif - }; - }; + struct __call_single_node node; smp_call_func_t func; void *info; }; +#define CSD_INIT(_func, _info) \ + (struct __call_single_data){ .func = (_func), .info = (_info), } + /* Use __aligned() to avoid to use 2 cache lines for 1 csd */ typedef struct __call_single_data call_single_data_t __aligned(sizeof(struct __call_single_data)); +#define INIT_CSD(_csd, _func, _info) \ +do { \ + *(_csd) = CSD_INIT((_func), (_info)); \ +} while (0) + /* * Enqueue a llist_node on the call_single_queue; be very careful, read * flush_smp_call_function_queue() in detail. -- cgit v1.2.3 From 2914b0ba61a9d253535e51af16c7122a8148995d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2020 22:28:37 +0200 Subject: irq_work: Optimize irq_work_single() Trade one atomic op for a full memory barrier. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/irqflags.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index fef2d43a7a1d..8de0e1373de7 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -107,14 +107,14 @@ do { \ current->irq_config = 0; \ } while (0) -# define lockdep_irq_work_enter(__work) \ +# define lockdep_irq_work_enter(_flags) \ do { \ - if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\ + if (!((_flags) & IRQ_WORK_HARD_IRQ)) \ current->irq_config = 1; \ } while (0) -# define lockdep_irq_work_exit(__work) \ +# define lockdep_irq_work_exit(_flags) \ do { \ - if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\ + if (!((_flags) & IRQ_WORK_HARD_IRQ)) \ current->irq_config = 0; \ } while (0) -- cgit v1.2.3 From 6bef038011a023db41f1b33f0776224729d52344 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 20 Nov 2020 14:42:38 -0700 Subject: rpmsg: Introduce __rpmsg{16|32|64} types Introduce __rpmsg{16|32|64} types along with byte order conversion functions based on an rpmsg_device operation as a foundation to make RPMSG modular and transport agnostic. Tested-by: Guennadi Liakhovetski Suggested-by: Guennadi Liakhovetski Signed-off-by: Mathieu Poirier Reviewed-by: Arnaud Pouliquen Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20201120214245.172963-2-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 51 +++++++++++++++++++++++++++++++ include/linux/rpmsg/byteorder.h | 67 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 include/linux/rpmsg/byteorder.h (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 9fe156d1c018..faf2daff6238 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -17,6 +17,7 @@ #include #include #include +#include #define RPMSG_ADDR_ANY 0xFFFFFFFF @@ -46,6 +47,7 @@ struct rpmsg_channel_info { * @dst: destination address * @ept: the rpmsg endpoint of this channel * @announce: if set, rpmsg will announce the creation/removal of this channel + * @little_endian: True if transport is using little endian byte representation */ struct rpmsg_device { struct device dev; @@ -55,6 +57,7 @@ struct rpmsg_device { u32 dst; struct rpmsg_endpoint *ept; bool announce; + bool little_endian; const struct rpmsg_device_ops *ops; }; @@ -111,6 +114,54 @@ struct rpmsg_driver { int (*callback)(struct rpmsg_device *, void *, int, void *, u32); }; +static inline u16 rpmsg16_to_cpu(struct rpmsg_device *rpdev, __rpmsg16 val) +{ + if (!rpdev) + return __rpmsg16_to_cpu(rpmsg_is_little_endian(), val); + else + return __rpmsg16_to_cpu(rpdev->little_endian, val); +} + +static inline __rpmsg16 cpu_to_rpmsg16(struct rpmsg_device *rpdev, u16 val) +{ + if (!rpdev) + return __cpu_to_rpmsg16(rpmsg_is_little_endian(), val); + else + return __cpu_to_rpmsg16(rpdev->little_endian, val); +} + +static inline u32 rpmsg32_to_cpu(struct rpmsg_device *rpdev, __rpmsg32 val) +{ + if (!rpdev) + return __rpmsg32_to_cpu(rpmsg_is_little_endian(), val); + else + return __rpmsg32_to_cpu(rpdev->little_endian, val); +} + +static inline __rpmsg32 cpu_to_rpmsg32(struct rpmsg_device *rpdev, u32 val) +{ + if (!rpdev) + return __cpu_to_rpmsg32(rpmsg_is_little_endian(), val); + else + return __cpu_to_rpmsg32(rpdev->little_endian, val); +} + +static inline u64 rpmsg64_to_cpu(struct rpmsg_device *rpdev, __rpmsg64 val) +{ + if (!rpdev) + return __rpmsg64_to_cpu(rpmsg_is_little_endian(), val); + else + return __rpmsg64_to_cpu(rpdev->little_endian, val); +} + +static inline __rpmsg64 cpu_to_rpmsg64(struct rpmsg_device *rpdev, u64 val) +{ + if (!rpdev) + return __cpu_to_rpmsg64(rpmsg_is_little_endian(), val); + else + return __cpu_to_rpmsg64(rpdev->little_endian, val); +} + #if IS_ENABLED(CONFIG_RPMSG) int register_rpmsg_device(struct rpmsg_device *dev); diff --git a/include/linux/rpmsg/byteorder.h b/include/linux/rpmsg/byteorder.h new file mode 100644 index 000000000000..c0f565dbad6d --- /dev/null +++ b/include/linux/rpmsg/byteorder.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Follows implementation found in linux/virtio_byteorder.h + */ +#ifndef _LINUX_RPMSG_BYTEORDER_H +#define _LINUX_RPMSG_BYTEORDER_H +#include +#include + +static inline bool rpmsg_is_little_endian(void) +{ +#ifdef __LITTLE_ENDIAN + return true; +#else + return false; +#endif +} + +static inline u16 __rpmsg16_to_cpu(bool little_endian, __rpmsg16 val) +{ + if (little_endian) + return le16_to_cpu((__force __le16)val); + else + return be16_to_cpu((__force __be16)val); +} + +static inline __rpmsg16 __cpu_to_rpmsg16(bool little_endian, u16 val) +{ + if (little_endian) + return (__force __rpmsg16)cpu_to_le16(val); + else + return (__force __rpmsg16)cpu_to_be16(val); +} + +static inline u32 __rpmsg32_to_cpu(bool little_endian, __rpmsg32 val) +{ + if (little_endian) + return le32_to_cpu((__force __le32)val); + else + return be32_to_cpu((__force __be32)val); +} + +static inline __rpmsg32 __cpu_to_rpmsg32(bool little_endian, u32 val) +{ + if (little_endian) + return (__force __rpmsg32)cpu_to_le32(val); + else + return (__force __rpmsg32)cpu_to_be32(val); +} + +static inline u64 __rpmsg64_to_cpu(bool little_endian, __rpmsg64 val) +{ + if (little_endian) + return le64_to_cpu((__force __le64)val); + else + return be64_to_cpu((__force __be64)val); +} + +static inline __rpmsg64 __cpu_to_rpmsg64(bool little_endian, u64 val) +{ + if (little_endian) + return (__force __rpmsg64)cpu_to_le64(val); + else + return (__force __rpmsg64)cpu_to_be64(val); +} + +#endif /* _LINUX_RPMSG_BYTEORDER_H */ -- cgit v1.2.3 From c435a04189de372ba9ae72076b18185a884108d6 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 20 Nov 2020 14:42:40 -0700 Subject: rpmsg: Move structure rpmsg_ns_msg to header file Move structure rpmsg_ns_msg to its own header file so that it can be used by other entities. Tested-by: Guennadi Liakhovetski Signed-off-by: Mathieu Poirier Reviewed-by: Arnaud Pouliquen Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20201120214245.172963-4-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/rpmsg/ns.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/linux/rpmsg/ns.h (limited to 'include/linux') diff --git a/include/linux/rpmsg/ns.h b/include/linux/rpmsg/ns.h new file mode 100644 index 000000000000..73ecc91dc26f --- /dev/null +++ b/include/linux/rpmsg/ns.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_RPMSG_NS_H +#define _LINUX_RPMSG_NS_H + +#include +#include +#include + +/** + * struct rpmsg_ns_msg - dynamic name service announcement message + * @name: name of remote service that is published + * @addr: address of remote service that is published + * @flags: indicates whether service is created or destroyed + * + * This message is sent across to publish a new service, or announce + * about its removal. When we receive these messages, an appropriate + * rpmsg channel (i.e device) is created/destroyed. In turn, the ->probe() + * or ->remove() handler of the appropriate rpmsg driver will be invoked + * (if/as-soon-as one is registered). + */ +struct rpmsg_ns_msg { + char name[RPMSG_NAME_SIZE]; + __rpmsg32 addr; + __rpmsg32 flags; +} __packed; + +/** + * enum rpmsg_ns_flags - dynamic name service announcement flags + * + * @RPMSG_NS_CREATE: a new remote service was just created + * @RPMSG_NS_DESTROY: a known remote service was just destroyed + */ +enum rpmsg_ns_flags { + RPMSG_NS_CREATE = 0, + RPMSG_NS_DESTROY = 1, +}; + +/* Address 53 is reserved for advertising remote services */ +#define RPMSG_NS_ADDR (53) + +#endif -- cgit v1.2.3 From 55488110acc1560b4599e3d509b13f2731a6fee1 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 20 Nov 2020 14:42:44 -0700 Subject: rpmsg: Make rpmsg_{register|unregister}_device() public Make function rpmsg_register_device() and rpmsg_unregister_device() functions public so that they can be used by other clients. While doing so get rid of two obsolete function, i.e register_rpmsg_device() and unregister_rpmsg_device(), to prevent confusion. Tested-by: Guennadi Liakhovetski Signed-off-by: Mathieu Poirier Reviewed-by: Arnaud Pouliquen Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20201120214245.172963-8-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index faf2daff6238..a5db828b2420 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -164,8 +164,9 @@ static inline __rpmsg64 cpu_to_rpmsg64(struct rpmsg_device *rpdev, u64 val) #if IS_ENABLED(CONFIG_RPMSG) -int register_rpmsg_device(struct rpmsg_device *dev); -void unregister_rpmsg_device(struct rpmsg_device *dev); +int rpmsg_register_device(struct rpmsg_device *rpdev); +int rpmsg_unregister_device(struct device *parent, + struct rpmsg_channel_info *chinfo); int __register_rpmsg_driver(struct rpmsg_driver *drv, struct module *owner); void unregister_rpmsg_driver(struct rpmsg_driver *drv); void rpmsg_destroy_ept(struct rpmsg_endpoint *); @@ -188,15 +189,18 @@ __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, #else -static inline int register_rpmsg_device(struct rpmsg_device *dev) +static inline int rpmsg_register_device(struct rpmsg_device *rpdev) { return -ENXIO; } -static inline void unregister_rpmsg_device(struct rpmsg_device *dev) +static inline int rpmsg_unregister_device(struct device *parent, + struct rpmsg_channel_info *chinfo) { /* This shouldn't be possible */ WARN_ON(1); + + return -ENXIO; } static inline int __register_rpmsg_driver(struct rpmsg_driver *drv, -- cgit v1.2.3 From 950a7388f02bf775515d13dc508cb9d749bd6d91 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Fri, 20 Nov 2020 14:42:45 -0700 Subject: rpmsg: Turn name service into a stand alone driver Make the RPMSG name service announcement a stand alone driver so that it can be reused by other subsystems. It is also the first step in making the functionatlity transport independent, i.e that is not tied to virtIO. Reviewed-by: Guennadi Liakhovetski Tested-by: Guennadi Liakhovetski Co-developed-by: Mathieu Poirier Signed-off-by: Mathieu Poirier Co-developed-by: Guennadi Liakhovetski Signed-off-by: Guennadi Liakhovetski Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20201120214245.172963-9-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/rpmsg/ns.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rpmsg/ns.h b/include/linux/rpmsg/ns.h index 73ecc91dc26f..a7804edd6d58 100644 --- a/include/linux/rpmsg/ns.h +++ b/include/linux/rpmsg/ns.h @@ -4,6 +4,7 @@ #define _LINUX_RPMSG_NS_H #include +#include #include #include @@ -39,4 +40,6 @@ enum rpmsg_ns_flags { /* Address 53 is reserved for advertising remote services */ #define RPMSG_NS_ADDR (53) +int rpmsg_ns_register_device(struct rpmsg_device *rpdev); + #endif -- cgit v1.2.3 From e7bbb7acabf47d74672e0e314bed4d452d2097b4 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 9 Nov 2020 14:24:49 +0530 Subject: dmaengine: add peripheral configuration Some complex dmaengine controllers have capability to program the peripheral device, so pass on the peripheral configuration as part of dma_slave_config Link: https://lore.kernel.org/r/20201109085450.24843-3-vkoul@kernel.org Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index dd357a747780..493a047ed0a2 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -418,6 +418,9 @@ enum dma_slave_buswidth { * @slave_id: Slave requester id. Only valid for slave channels. The dma * slave peripheral will have unique id as dma requester which need to be * pass as slave config. + * @peripheral_config: peripheral configuration for programming peripheral + * for dmaengine transfer + * @peripheral_size: peripheral configuration buffer size * * This struct is passed in as configuration data to a DMA engine * in order to set up a certain channel for DMA transport at runtime. @@ -443,6 +446,8 @@ struct dma_slave_config { u32 dst_port_window_size; bool device_fc; unsigned int slave_id; + void *peripheral_config; + size_t peripheral_size; }; /** -- cgit v1.2.3 From 5d0c3533a19f48e5e7e73806a3e4b29cd4364130 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 9 Nov 2020 14:24:50 +0530 Subject: dmaengine: qcom: Add GPI dma driver This controller provides DMAengine capabilities for a variety of peripheral buses such as I2C, UART, and SPI. By using GPI dmaengine driver, bus drivers can use a standardize interface that is protocol independent to transfer data between memory and peripheral. Link: https://lore.kernel.org/r/20201109085450.24843-4-vkoul@kernel.org Signed-off-by: Vinod Koul --- include/linux/dma/qcom-gpi-dma.h | 83 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 include/linux/dma/qcom-gpi-dma.h (limited to 'include/linux') diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h new file mode 100644 index 000000000000..f46dc3372f11 --- /dev/null +++ b/include/linux/dma/qcom-gpi-dma.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020, Linaro Limited + */ + +#ifndef QCOM_GPI_DMA_H +#define QCOM_GPI_DMA_H + +/** + * enum spi_transfer_cmd - spi transfer commands + */ +enum spi_transfer_cmd { + SPI_TX = 1, + SPI_RX, + SPI_DUPLEX, +}; + +/** + * struct gpi_spi_config - spi config for peripheral + * + * @loopback_en: spi loopback enable when set + * @clock_pol_high: clock polarity + * @data_pol_high: data polarity + * @pack_en: process tx/rx buffers as packed + * @word_len: spi word length + * @clk_div: source clock divider + * @clk_src: serial clock + * @cmd: spi cmd + * @fragmentation: keep CS assserted at end of sequence + * @cs: chip select toggle + * @set_config: set peripheral config + * @rx_len: receive length for buffer + */ +struct gpi_spi_config { + u8 set_config; + u8 loopback_en; + u8 clock_pol_high; + u8 data_pol_high; + u8 pack_en; + u8 word_len; + u8 fragmentation; + u8 cs; + u32 clk_div; + u32 clk_src; + enum spi_transfer_cmd cmd; + u32 rx_len; +}; + +enum i2c_op { + I2C_WRITE = 1, + I2C_READ, +}; + +/** + * struct gpi_i2c_config - i2c config for peripheral + * + * @pack_enable: process tx/rx buffers as packed + * @cycle_count: clock cycles to be sent + * @high_count: high period of clock + * @low_count: low period of clock + * @clk_div: source clock divider + * @addr: i2c bus address + * @stretch: stretch the clock at eot + * @set_config: set peripheral config + * @rx_len: receive length for buffer + * @op: i2c cmd + * @muli-msg: is part of multi i2c r-w msgs + */ +struct gpi_i2c_config { + u8 set_config; + u8 pack_enable; + u8 cycle_count; + u8 high_count; + u8 low_count; + u8 addr; + u8 stretch; + u16 clk_div; + u32 rx_len; + enum i2c_op op; + bool multi_msg; +}; + +#endif /* QCOM_GPI_DMA_H */ -- cgit v1.2.3 From 36ccdf85829a7dd6936dba5d02fa50138471f0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 23 Nov 2020 18:56:00 +0100 Subject: net, xsk: Avoid taking multiple skbuff references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") addressed the problem that packets were discarded from the Tx AF_XDP ring, when the driver returned NETDEV_TX_BUSY. Part of the fix was bumping the skbuff reference count, so that the buffer would not be freed by dev_direct_xmit(). A reference count larger than one means that the skbuff is "shared", which is not the case. If the "shared" skbuff is sent to the generic XDP receive path, netif_receive_generic_xdp(), and pskb_expand_head() is entered the BUG_ON(skb_shared(skb)) will trigger. This patch adds a variant to dev_direct_xmit(), __dev_direct_xmit(), where a user can select the skbuff free policy. This allows AF_XDP to avoid bumping the reference count, but still keep the NETDEV_TX_BUSY behavior. Fixes: 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") Reported-by: Yonghong Song Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201123175600.146255-1-bjorn.topel@gmail.com --- include/linux/netdevice.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..76775abf259d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2813,9 +2813,21 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); + int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); + +static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) +{ + int ret; + + ret = __dev_direct_xmit(skb, queue_id); + if (!dev_xmit_complete(ret)) + kfree_skb(skb); + return ret; +} + int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); void unregister_netdevice_many(struct list_head *head); -- cgit v1.2.3 From 159e1de201b6fca10bfec50405a3b53a561096a8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 17 Nov 2020 23:56:05 -0800 Subject: fscrypt: add fscrypt_is_nokey_name() It's possible to create a duplicate filename in an encrypted directory by creating a file concurrently with adding the encryption key. Specifically, sys_open(O_CREAT) (or sys_mkdir(), sys_mknod(), or sys_symlink()) can lookup the target filename while the directory's encryption key hasn't been added yet, resulting in a negative no-key dentry. The VFS then calls ->create() (or ->mkdir(), ->mknod(), or ->symlink()) because the dentry is negative. Normally, ->create() would return -ENOKEY due to the directory's key being unavailable. However, if the key was added between the dentry lookup and ->create(), then the filesystem will go ahead and try to create the file. If the target filename happens to already exist as a normal name (not a no-key name), a duplicate filename may be added to the directory. In order to fix this, we need to fix the filesystems to prevent ->create(), ->mkdir(), ->mknod(), and ->symlink() on no-key names. (->rename() and ->link() need it too, but those are already handled correctly by fscrypt_prepare_rename() and fscrypt_prepare_link().) In preparation for this, add a helper function fscrypt_is_nokey_name() that filesystems can use to do this check. Use this helper function for the existing checks that fs/crypto/ does for rename and link. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201118075609.120337-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index a8f7a43f031b..8e1d31c959bf 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -111,6 +111,35 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry) dentry->d_flags &= ~DCACHE_NOKEY_NAME; } +/** + * fscrypt_is_nokey_name() - test whether a dentry is a no-key name + * @dentry: the dentry to check + * + * This returns true if the dentry is a no-key dentry. A no-key dentry is a + * dentry that was created in an encrypted directory that hasn't had its + * encryption key added yet. Such dentries may be either positive or negative. + * + * When a filesystem is asked to create a new filename in an encrypted directory + * and the new filename's dentry is a no-key dentry, it must fail the operation + * with ENOKEY. This includes ->create(), ->mkdir(), ->mknod(), ->symlink(), + * ->rename(), and ->link(). (However, ->rename() and ->link() are already + * handled by fscrypt_prepare_rename() and fscrypt_prepare_link().) + * + * This is necessary because creating a filename requires the directory's + * encryption key, but just checking for the key on the directory inode during + * the final filesystem operation doesn't guarantee that the key was available + * during the preceding dentry lookup. And the key must have already been + * available during the dentry lookup in order for it to have been checked + * whether the filename already exists in the directory and for the new file's + * dentry not to be invalidated due to it incorrectly having the no-key flag. + * + * Return: %true if the dentry is a no-key name + */ +static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) +{ + return dentry->d_flags & DCACHE_NOKEY_NAME; +} + /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); @@ -244,6 +273,11 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry) { } +static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) +{ + return false; +} + /* crypto.c */ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { -- cgit v1.2.3 From 234f1b7f8daf112655c87f75ae8932564f871225 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 17 Nov 2020 23:56:09 -0800 Subject: fscrypt: remove unnecessary calls to fscrypt_require_key() In an encrypted directory, a regular dentry (one that doesn't have the no-key name flag) can only be created if the directory's encryption key is available. Therefore the calls to fscrypt_require_key() in __fscrypt_prepare_link() and __fscrypt_prepare_rename() are unnecessary, as these functions already check that the dentries they're given aren't no-key names. Remove these unnecessary calls to fscrypt_require_key(). Link: https://lore.kernel.org/r/20201118075609.120337-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 8e1d31c959bf..0c9e64969b73 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -710,8 +710,7 @@ static inline int fscrypt_require_key(struct inode *inode) * * A new link can only be added to an encrypted directory if the directory's * encryption key is available --- since otherwise we'd have no way to encrypt - * the filename. Therefore, we first set up the directory's encryption key (if - * not already done) and return an error if it's unavailable. + * the filename. * * We also verify that the link will not violate the constraint that all files * in an encrypted directory tree use the same encryption policy. -- cgit v1.2.3 From 5903f61e035320104394f721f74cd22171636f63 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 23 Nov 2020 10:54:58 -0500 Subject: entry: Fix boot for !CONFIG_GENERIC_ENTRY A copy-pasta mistake tries to set SYSCALL_WORK flags instead of TIF flags for !CONFIG_GENERIC_ENTRY. Also, add safeguards to catch this at compilation time. Fixes: 3136b93c3fb2 ("entry: Expose helpers to migrate TIF to SYSCALL_WORK flags") Reported-by: Naresh Kamboju Suggested-by: Jann Horn Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/87a6v8qd9p.fsf_-_@collabora.com --- include/linux/thread_info.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 317363212ae9..ca80a214df09 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -35,6 +35,7 @@ enum { GOOD_STACK, }; +#ifdef CONFIG_GENERIC_ENTRY enum syscall_work_bit { SYSCALL_WORK_BIT_SECCOMP, SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT, @@ -48,6 +49,7 @@ enum syscall_work_bit { #define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) #define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) #define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT) +#endif #include @@ -129,11 +131,11 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) #else /* CONFIG_GENERIC_ENTRY */ #define set_syscall_work(fl) \ - set_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) + set_ti_thread_flag(current_thread_info(), TIF_##fl) #define test_syscall_work(fl) \ - test_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) + test_ti_thread_flag(current_thread_info(), TIF_##fl) #define clear_syscall_work(fl) \ - clear_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) + clear_ti_thread_flag(current_thread_info(), TIF_##fl) #define set_task_syscall_work(t, fl) \ set_ti_thread_flag(task_thread_info(t), TIF_##fl) -- cgit v1.2.3 From 2fb94784952e4b290c392b74c2c67b4afa672523 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 24 Nov 2020 09:33:16 +0800 Subject: soundwire: registers: add definitions for clearable interrupt fields DP0 has reserved fields and the read-only SDCA_CASCADE bit. We should not try to write values in these fields, so add a formal definition for clearable interrupts to be used in DP0 interrupt handling. DPN also has reserved fields so add definitions for clearable interrupts as well. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20201124013318.8963-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_registers.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_registers.h b/include/linux/soundwire/sdw_registers.h index f420e8059779..0cb1a22685b8 100644 --- a/include/linux/soundwire/sdw_registers.h +++ b/include/linux/soundwire/sdw_registers.h @@ -41,6 +41,12 @@ #define SDW_DP0_INT_IMPDEF1 BIT(5) #define SDW_DP0_INT_IMPDEF2 BIT(6) #define SDW_DP0_INT_IMPDEF3 BIT(7) +#define SDW_DP0_INTERRUPTS (SDW_DP0_INT_TEST_FAIL | \ + SDW_DP0_INT_PORT_READY | \ + SDW_DP0_INT_BRA_FAILURE | \ + SDW_DP0_INT_IMPDEF1 | \ + SDW_DP0_INT_IMPDEF2 | \ + SDW_DP0_INT_IMPDEF3) #define SDW_DP0_PORTCTRL_DATAMODE GENMASK(3, 2) #define SDW_DP0_PORTCTRL_NXTINVBANK BIT(4) @@ -241,6 +247,11 @@ #define SDW_DPN_INT_IMPDEF1 BIT(5) #define SDW_DPN_INT_IMPDEF2 BIT(6) #define SDW_DPN_INT_IMPDEF3 BIT(7) +#define SDW_DPN_INTERRUPTS (SDW_DPN_INT_TEST_FAIL | \ + SDW_DPN_INT_PORT_READY | \ + SDW_DPN_INT_IMPDEF1 | \ + SDW_DPN_INT_IMPDEF2 | \ + SDW_DPN_INT_IMPDEF3) #define SDW_DPN_PORTCTRL_FLOWMODE GENMASK(1, 0) #define SDW_DPN_PORTCTRL_DATAMODE GENMASK(3, 2) -- cgit v1.2.3 From 2a2b8eaa5b25668a6f717f94b55f4e3aaf87629d Mon Sep 17 00:00:00 2001 From: Tom Murphy Date: Tue, 24 Nov 2020 16:20:51 +0800 Subject: iommu: Handle freelists when using deferred flushing in iommu drivers Allow the iommu_unmap_fast to return newly freed page table pages and pass the freelist to queue_iova in the dma-iommu ops path. This is useful for iommu drivers (in this case the intel iommu driver) which need to wait for the ioTLB to be flushed before newly free/unmapped page table pages can be freed. This way we can still batch ioTLB free operations and handle the freelists. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu Tested-by: Logan Gunthorpe Link: https://lore.kernel.org/r/20201124082057.2614359-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b95a6f8db6ff..e56bae327e35 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -180,6 +180,7 @@ struct iommu_iotlb_gather { unsigned long start; unsigned long end; size_t pgsize; + struct page *freelist; }; /** -- cgit v1.2.3 From 230309d08b871e439f8618db3610f2cc9b5f7c72 Mon Sep 17 00:00:00 2001 From: Tom Murphy Date: Tue, 24 Nov 2020 16:20:52 +0800 Subject: iommu: Add iommu_dma_free_cpu_cached_iovas() Add a iommu_dma_free_cpu_cached_iovas function to allow drivers which use the dma-iommu ops to free cached cpu iovas. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu Tested-by: Logan Gunthorpe Link: https://lore.kernel.org/r/20201124082057.2614359-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- include/linux/dma-iommu.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 2112f21f73d8..706b68d1359b 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -37,6 +37,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); +void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, + struct iommu_domain *domain); + #else /* CONFIG_IOMMU_DMA */ struct iommu_domain; @@ -78,5 +81,10 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } +static inline void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, + struct iommu_domain *domain) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ -- cgit v1.2.3 From a7656ecf825ac0434a5e7bf108ec1a56b65ee5e4 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Wed, 25 Nov 2020 12:30:10 +0530 Subject: iommu/io-pgtable: Add a domain attribute for pagetable configuration Add a new iommu domain attribute DOMAIN_ATTR_IO_PGTABLE_CFG for pagetable configuration which initially will be used to set quirks like for system cache aka last level cache to be used by client drivers like GPU to set right attributes for caching the hardware pagetables into the system cache and later can be extended to include other page table configuration data. Signed-off-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/9190aa16f378fc0a7f8e57b2b9f60b033e7eeb4f.1606287059.git.saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 4 ++++ include/linux/iommu.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 4cde111e425b..215fd9d69540 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -208,6 +208,10 @@ struct io_pgtable { #define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops) +struct io_pgtable_domain_attr { + unsigned long quirks; +}; + static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop) { iop->cfg.tlb->tlb_flush_all(iop->cookie); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b95a6f8db6ff..ffaa389ea128 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -118,6 +118,7 @@ enum iommu_attr { DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_NESTING, /* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, + DOMAIN_ATTR_IO_PGTABLE_CFG, DOMAIN_ATTR_MAX, }; -- cgit v1.2.3 From e67890c97944b9962cf8c140a7f8077ed643b7d7 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Wed, 25 Nov 2020 12:30:11 +0530 Subject: iommu/io-pgtable-arm: Add support to use system cache Add a quirk IO_PGTABLE_QUIRK_ARM_OUTER_WBWA to override the outer-cacheability attributes set in the TCR for a non-coherent page table walker when using system cache. Signed-off-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/f818676b4a2a9ad1edb92721947d47db41ed6a7c.1606287059.git.saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 215fd9d69540..fb4d5a763e0c 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -86,6 +86,9 @@ struct io_pgtable_cfg { * * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table * for use in the upper half of a split address space. + * + * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability + * attributes set in the TCR for a non-coherent page-table walker. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -93,6 +96,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) + #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; -- cgit v1.2.3 From 0801a0073f86e020987acbbd96b50f9c85d79de8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:14 +0100 Subject: module: drop version-attribute alignment Commit 98562ad8cb03 ("module: explicitly align module_version_attribute structure") added an alignment attribute to the struct module_version_attribute type in order to fix an alignment issue on m68k where the structure is 2-byte aligned while MODULE_VERSION() forced the __modver section entries to be 4-byte aligned (sizeof(void *)). This was essentially an alternative fix to the problem addressed by b4bc842802db ("module: deal with alignment issues in built-in module versions") which used the array-of-pointer trick to prevent gcc from increasing alignment of the version attribute entries. And with the pointer indirection in place there's no need to increase the alignment of the type. Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 7ccdf87f376f..293250958512 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -66,7 +66,7 @@ struct module_version_attribute { struct module_attribute mattr; const char *module_name; const char *version; -} __attribute__ ((__aligned__(sizeof(void *)))); +}; extern ssize_t __modver_version_show(struct module_attribute *, struct module_kobject *, char *); -- cgit v1.2.3 From b112082c8930e7aa72422484b2d31d3aa06f58bc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:15 +0100 Subject: module: simplify version-attribute handling Instead of using the array-of-pointers trick to avoid having gcc mess up the built-in module-version array stride, specify type alignment when declaring entries to prevent gcc from increasing alignment. This is essentially an alternative (one-line) fix to the problem addressed by commit b4bc842802db ("module: deal with alignment issues in built-in module versions"). gcc can increase the alignment of larger objects with static extent as an optimisation, but this can be suppressed by using the aligned attribute when declaring variables. Note that we have been relying on this behaviour for kernel parameters for 16 years and it indeed hasn't changed since the introduction of the aligned attribute in gcc-3.1. Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/module.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 293250958512..5958075ea3f4 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -266,20 +266,20 @@ extern typeof(name) __mod_##type##__##name##_device_table \ #else #define MODULE_VERSION(_version) \ MODULE_INFO(version, _version); \ - static struct module_version_attribute ___modver_attr = { \ - .mattr = { \ - .attr = { \ - .name = "version", \ - .mode = S_IRUGO, \ + static struct module_version_attribute __modver_attr \ + __used __section("__modver") \ + __aligned(__alignof__(struct module_version_attribute)) \ + = { \ + .mattr = { \ + .attr = { \ + .name = "version", \ + .mode = S_IRUGO, \ + }, \ + .show = __modver_version_show, \ }, \ - .show = __modver_version_show, \ - }, \ - .module_name = KBUILD_MODNAME, \ - .version = _version, \ - }; \ - static const struct module_version_attribute \ - __used __section("__modver") \ - * __moduleparam_const __modver_attr = &___modver_attr + .module_name = KBUILD_MODNAME, \ + .version = _version, \ + }; #endif /* Optional firmware file (or files) needed by the module -- cgit v1.2.3 From 8d6615f1fccc4f39d7d3dcf286b33e8a1e833d2b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:17 +0100 Subject: params: drop redundant "unused" attributes Drop the redundant "unused" attributes from module-parameter structures already marked "used". Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/moduleparam.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 6388eb9734a5..742074ad9f6e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -22,7 +22,7 @@ #define __MODULE_INFO(tag, name, info) \ static const char __UNIQUE_ID(name)[] \ - __used __section(".modinfo") __attribute__((unused, aligned(1))) \ + __used __section(".modinfo") __attribute__((aligned(1))) \ = __MODULE_INFO_PREFIX __stringify(tag) "=" info #define __MODULE_PARM_TYPE(name, _type) \ @@ -289,7 +289,7 @@ struct kparam_array static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ - __section("__param") __attribute__ ((unused, aligned(sizeof(void *)))) \ + __section("__param") __attribute__ ((aligned(sizeof(void *)))) \ = { __param_str_##name, THIS_MODULE, ops, \ VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } -- cgit v1.2.3 From fe2f4fe139b321a38daafc715aeb7d21d9e8e5ad Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:18 +0100 Subject: params: use type alignment for kernel parameters Specify type alignment for kernel parameters instead of sizeof(void *). The alignment attribute is used to prevent gcc from increasing the alignment of objects with static extent as an optimisation, something which would mess up the __param array stride. Using __alignof__(struct kernel_param) rather than sizeof(void *) is preferred since it better indicates why it is there and doesn't break should the type size or alignment change. Note that on m68k the alignment of struct kernel_param is actually two and that adding a 1- or 2-byte field to the 20-byte struct would cause a breakage with the current 4-byte alignment. Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/moduleparam.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 742074ad9f6e..15ecc6cc3a3b 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -288,8 +288,8 @@ struct kparam_array /* Default value instead of permissions? */ \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ - __used \ - __section("__param") __attribute__ ((aligned(sizeof(void *)))) \ + __used __section("__param") \ + __aligned(__alignof__(struct kernel_param)) \ = { __param_str_##name, THIS_MODULE, ops, \ VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } -- cgit v1.2.3 From 2aec389e19150ed3bf67ab708f2435563f76050f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:19 +0100 Subject: params: clean up module-param macros Clean up the module-param macros by adding some indentation and using the __aligned() macro to improve readability. Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/moduleparam.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 15ecc6cc3a3b..eed280fae433 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -21,12 +21,12 @@ #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) #define __MODULE_INFO(tag, name, info) \ -static const char __UNIQUE_ID(name)[] \ - __used __section(".modinfo") __attribute__((aligned(1))) \ - = __MODULE_INFO_PREFIX __stringify(tag) "=" info + static const char __UNIQUE_ID(name)[] \ + __used __section(".modinfo") __aligned(1) \ + = __MODULE_INFO_PREFIX __stringify(tag) "=" info #define __MODULE_PARM_TYPE(name, _type) \ - __MODULE_INFO(parmtype, name##type, #name ":" _type) + __MODULE_INFO(parmtype, name##type, #name ":" _type) /* One for each parameter, describing how to use it. Some files do multiple of these per line, so can't just use MODULE_INFO. */ -- cgit v1.2.3 From 367c820ef08082e68df8a3bc12e62393af21e4b5 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Wed, 7 Oct 2020 14:21:43 +0530 Subject: arm64: Enable perf events based hard lockup detector With the recent feature added to enable perf events to use pseudo NMIs as interrupts on platforms which support GICv3 or later, its now been possible to enable hard lockup detector (or NMI watchdog) on arm64 platforms. So enable corresponding support. One thing to note here is that normally lockup detector is initialized just after the early initcalls but PMU on arm64 comes up much later as device_initcall(). So we need to re-initialize lockup detection once PMU has been initialized. Signed-off-by: Sumit Garg Acked-by: Alexandru Elisei Link: https://lore.kernel.org/r/1602060704-10921-1-git-send-email-sumit.garg@linaro.org Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 505480217cf1..bf7966776c55 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -163,6 +163,8 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif +bool arm_pmu_irq_is_nmi(void); + /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); -- cgit v1.2.3 From 63653368c25ff0b1b1aaf045c97ea87bd8c16123 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Wed, 25 Nov 2020 14:58:17 +0800 Subject: block: remove unused BIO_SPLIT_ENTRIES Since commit 4b1faf931650 ("block: Kill bio_pair_split()"), there's no user of BIO_SPLIT_ENTRIES anymore. Signed-off-by: Jeffle Xu Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index c6d765382926..ecf67108f091 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -711,12 +711,6 @@ static inline bool bioset_initialized(struct bio_set *bs) return bs->bio_slab != NULL; } -/* - * a small number of entries is fine, not going to be performance critical. - * basically we just need to survive - */ -#define BIO_SPLIT_ENTRIES 2 - #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_for_each_vec(bvl, bip, iter) \ -- cgit v1.2.3 From 6527b938426f7fa66051273568d234b1fe01a15b Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 23 Nov 2020 17:38:17 +0200 Subject: net: phy: remove the .did_interrupt() and .ack_interrupt() callback Now that all the PHY drivers have been migrated to directly implement the generic .handle_interrupt() callback for a seamless support of shared IRQs and all the .config_inter() implementations clear any pending interrupts, we can safely remove the two callbacks. With this patch, phylib has a proper support for shared IRQs (and not just for multi-PHY devices. A PHY driver must implement both the .handle_interrupt() and .config_intr() callbacks for the IRQs to be actually used. Signed-off-by: Ioana Ciornei Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 8849a00a093f..381a95732b6a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -743,18 +743,11 @@ struct phy_driver { /** @read_status: Determines the negotiated speed and duplex */ int (*read_status)(struct phy_device *phydev); - /** @ack_interrupt: Clears any pending interrupts */ - int (*ack_interrupt)(struct phy_device *phydev); - - /** @config_intr: Enables or disables interrupts */ - int (*config_intr)(struct phy_device *phydev); - - /** - * @did_interrupt: Checks if the PHY generated an interrupt. - * For multi-PHY devices with shared PHY interrupt pin - * Set interrupt bits have to be cleared. + /** @config_intr: Enables or disables interrupts. + * It should also clear any pending interrupts prior to enabling the + * IRQs and after disabling them. */ - int (*did_interrupt)(struct phy_device *phydev); + int (*config_intr)(struct phy_device *phydev); /** @handle_interrupt: Override default interrupt handling */ irqreturn_t (*handle_interrupt)(struct phy_device *phydev); @@ -1487,10 +1480,6 @@ static inline int genphy_config_aneg(struct phy_device *phydev) return __genphy_config_aneg(phydev, false); } -static inline int genphy_no_ack_interrupt(struct phy_device *phydev) -{ - return 0; -} static inline int genphy_no_config_intr(struct phy_device *phydev) { return 0; -- cgit v1.2.3 From 403319be5de51167cd70ddf594b76c95e6d26844 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 24 Nov 2020 15:12:08 +0000 Subject: ima: Implement ima_inode_hash This is in preparation to add a helper for BPF LSM programs to use IMA hashes when attached to LSM hooks. There are LSM hooks like inode_unlink which do not have a struct file * argument and cannot use the existing ima_file_hash API. An inode based API is, therefore, useful in LSM based detections like an executable trying to delete itself which rely on the inode_unlink LSM hook. Moreover, the ima_file_hash function does nothing with the struct file pointer apart from calling file_inode on it and converting it to an inode. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Mimi Zohar Link: https://lore.kernel.org/bpf/20201124151210.1081188-2-kpsingh@chromium.org --- include/linux/ima.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 8fa7bcfb2da2..7233a2751754 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -29,6 +29,7 @@ extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); extern void ima_post_path_mknod(struct dentry *dentry); extern int ima_file_hash(struct file *file, char *buf, size_t buf_size); +extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size); extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size); #ifdef CONFIG_IMA_KEXEC @@ -115,6 +116,11 @@ static inline int ima_file_hash(struct file *file, char *buf, size_t buf_size) return -EOPNOTSUPP; } +static inline int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size) +{ + return -EOPNOTSUPP; +} + static inline void ima_kexec_cmdline(int kernel_fd, const void *buf, int size) {} #endif /* CONFIG_IMA */ -- cgit v1.2.3 From 8b5536ad1216c47fb9b37ef2cd0cfa70d79d4645 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 24 Nov 2020 18:49:28 +0800 Subject: lockdep: Introduce in_softirq lockdep assert The current semantic for napi_consume_skb() is that caller need to provide non-zero budget when calling from NAPI context, and breaking this semantic will cause hard to debug problem, because _kfree_skb_defer() need to run in atomic context in order to push the skb to the particular cpu' napi_alloc_cache atomically. So add the lockdep_assert_in_softirq() to assert when the running context is not in_softirq, in_softirq means softirq is serving or BH is disabled, which has a ambiguous semantics due to the BH disabled confusion, so add a comment to emphasize that. And the softirq context can be interrupted by hard IRQ or NMI context, lockdep_assert_in_softirq() need to assert about hard IRQ or NMI context too. Suggested-by: Jakub Kicinski Signed-off-by: Yunsheng Lin Signed-off-by: Jakub Kicinski --- include/linux/lockdep.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index f5594879175a..92771bc1791f 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -594,6 +594,16 @@ do { \ this_cpu_read(hardirqs_enabled))); \ } while (0) +/* + * Acceptable for protecting per-CPU resources accessed from BH. + * Much like in_softirq() - semantics are ambiguous, use carefully. + */ +#define lockdep_assert_in_softirq() \ +do { \ + WARN_ON_ONCE(__lockdep_enabled && \ + (!in_softirq() || in_irq() || in_nmi())); \ +} while (0) + #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) @@ -605,6 +615,7 @@ do { \ # define lockdep_assert_preemption_enabled() do { } while (0) # define lockdep_assert_preemption_disabled() do { } while (0) +# define lockdep_assert_in_softirq() do { } while (0) #endif #ifdef CONFIG_PROVE_RAW_LOCK_NESTING -- cgit v1.2.3 From 4c1ad562d303526b5d9b49f5e0d72da13ef78dec Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 20 Nov 2020 21:20:42 -0600 Subject: remoteproc: Add a rproc_set_firmware() API A new API, rproc_set_firmware() is added to allow the remoteproc platform drivers and remoteproc client drivers to be able to configure a custom firmware name that is different from the default name used during remoteproc registration. This function is being introduced to provide a kernel-level equivalent of the current sysfs interface to remoteproc client drivers, and can only change firmwares when the remoteproc is offline. This allows some remoteproc drivers to choose different firmwares at runtime based on the functionality the remote processor is providing. The TI PRU Ethernet driver will be an example of such usage as it requires to use different firmwares for different supported protocols. Also, update the firmware_store() function used by the sysfs interface to reuse this function to avoid code duplication. Reviewed-by: Rishabh Bhatnagar Signed-off-by: Suman Anna Link: https://lore.kernel.org/r/20201121032042.6195-1-s-anna@ti.com Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 3fa3ba6498e8..e8ac041c64d9 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -653,6 +653,7 @@ rproc_of_resm_mem_entry_init(struct device *dev, u32 of_resm_idx, size_t len, int rproc_boot(struct rproc *rproc); void rproc_shutdown(struct rproc *rproc); +int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size); int rproc_coredump_add_custom_segment(struct rproc *rproc, -- cgit v1.2.3 From 7656ca71b0ba04ae7437afe3d046e9134442678e Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 25 Nov 2020 15:06:41 +0300 Subject: usb: pd: DFP product types USB Power Delivery Specification R3.0 introduced separate field for the DFP product type to the ID Header VDO. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201125120642.37156-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd_vdo.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h index 8c5cb5830754..8c08eeb9a74b 100644 --- a/include/linux/usb/pd_vdo.h +++ b/include/linux/usb/pd_vdo.h @@ -103,17 +103,25 @@ * -------------------- * <31> :: data capable as a USB host * <30> :: data capable as a USB device - * <29:27> :: product type + * <29:27> :: product type (UFP / Cable) * <26> :: modal operation supported (1b == yes) - * <25:16> :: Reserved, Shall be set to zero + * <25:16> :: product type (DFP) * <15:0> :: USB-IF assigned VID for this cable vendor */ #define IDH_PTYPE_UNDEF 0 #define IDH_PTYPE_HUB 1 #define IDH_PTYPE_PERIPH 2 +#define IDH_PTYPE_PSD 3 +#define IDH_PTYPE_AMA 5 + #define IDH_PTYPE_PCABLE 3 #define IDH_PTYPE_ACABLE 4 -#define IDH_PTYPE_AMA 5 + +#define IDH_PTYPE_DFP_UNDEF 0 +#define IDH_PTYPE_DFP_HUB 1 +#define IDH_PTYPE_DFP_HOST 2 +#define IDH_PTYPE_DFP_PB 3 +#define IDH_PTYPE_DFP_AMC 4 #define VDO_IDH(usbh, usbd, ptype, is_modal, vid) \ ((usbh) << 31 | (usbd) << 30 | ((ptype) & 0x7) << 27 \ @@ -122,6 +130,7 @@ #define PD_IDH_PTYPE(vdo) (((vdo) >> 27) & 0x7) #define PD_IDH_VID(vdo) ((vdo) & 0xffff) #define PD_IDH_MODAL_SUPP(vdo) ((vdo) & (1 << 26)) +#define PD_IDH_DFP_PTYPE(vdo) (((vdo) >> 23) & 0x7) /* * Cert Stat VDO -- cgit v1.2.3 From 640586f8af356096e084d69a9909d217852bde48 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 19 Nov 2020 17:02:21 +0100 Subject: powerpc/ptrace: Simplify gpr_get()/tm_cgpr_get() gpr_get() does membuf_write() twice to override pt_regs->msr in between. We can call membuf_write() once and change ->msr in the kernel buffer, this simplifies the code and the next fix. The patch adds a new simple helper, membuf_at(offs), it returns the new membuf which can be safely used after membuf_write(). Signed-off-by: Oleg Nesterov [mpe: Fixup some minor whitespace issues noticed by Christophe] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201119160221.GA5188@redhat.com --- include/linux/regset.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regset.h b/include/linux/regset.h index c3403f328257..a00765f0e8cf 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -46,6 +46,18 @@ static inline int membuf_write(struct membuf *s, const void *v, size_t size) return s->left; } +static inline struct membuf membuf_at(const struct membuf *s, size_t offs) +{ + struct membuf n = *s; + + if (offs > n.left) + offs = n.left; + n.p += offs; + n.left -= offs; + + return n; +} + /* current s->p must be aligned for v; v must be a scalar */ #define membuf_store(s, v) \ ({ \ -- cgit v1.2.3 From 11e5e568ceed7c8c570313a14fa96c72f21dad31 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 24 Nov 2020 17:48:04 -0800 Subject: usb: typec: tcpm: Stay in SNK_TRY_WAIT_DEBOUNCE_CHECK_VBUS till Rp is seen TD.4.7.3. Try SNK DRP Connect Try.SRC DRP fails. The compliance tester mimics being a Try.SRC USB-C port. The failure is due to TCPM exiting SNK_TRY_WAIT_DEBOUNCE_CHECK_VBUS when VBUS is not present eventhough when SNK.Rp is seen. Exit to SRC_TRYWAIT from SNK_TRY_WAIT_DEBOUNCE_CHECK_VBUS only when SNK.Rp is not seen for PD_T_TRY_CC_DEBOUNCE. >From the spec: The port shall then transition to Attached.SNK when the SNK.Rp state is detected on exactly one of the CC1 or CC2 pins for at least tTryCCDebounce and VBUS is detected. Alternatively, the port shall transition to TryWait.SRC if SNK.Rp state is not detected for tTryCCDebounce. Reviewed-by: Guenter Roeck Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20201125014804.1596719-1-badhri@google.com Acked-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index 3a805e2ecbc9..63a66dd5d832 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -484,6 +484,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_CC_DEBOUNCE 200 /* 100 - 200 ms */ #define PD_T_PD_DEBOUNCE 20 /* 10 - 20 ms */ +#define PD_T_TRY_CC_DEBOUNCE 15 /* 10 - 20 ms */ #define PD_N_CAPS_COUNT (PD_T_NO_RESPONSE / PD_T_SEND_SOURCE_CAP) #define PD_N_HARD_RESET_COUNT 2 -- cgit v1.2.3 From 07e21d4d96493fd0a8220ab134855253a34a9c84 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 4 Nov 2020 01:22:22 +0800 Subject: soundwire: SDCA: add helper macro to access controls The upcoming SDCA (SoundWire Device Class Audio) specification defines a hierarchical encoding to interface with Class-defined capabilities. The specification is not yet accessible to the general public but this information is released with explicit permission from the MIPI Board to avoid delays with SDCA support on Linux platforms. A block of 64 MBytes of register addresses are allocated to SDCA controls, starting at address 0x40000000. The 26 LSBs which identify individual controls are set based on the following variables: - Function Number. An SCDA device can be split in up to 8 independent Functions. Each of these Functions is described in the SDCA specification, e.g. Smart Amplifier, Smart Microphone, Simple Microphone, Jack codec, HID, etc. - Entity Number. Within each Function, an Entity is an identifiable block. Up to 127 Entities are connected in a pre-defined graph (similar to USB), with Entity0 reserved for Function-level configurations. In contrast to USB, the SDCA spec pre-defines Function Types, topologies, and allowed options, i.e. the degree of freedom is not unlimited to limit the possibility of errors in descriptors leading to software quirks. - Control Selector. Within each Entity, the SDCA specification defines 48 controls such as Mute, Gain, AGC, etc, and 16 implementation defined ones. Some Control Selectors might be used for low-level platform setup, and other exposed to applications and users. Note that the same Control Selector capability, e.g. Latency control, might be located at different offsets in different entities, the Control Selector mapping is Entity-specific. - Control Number. Some Control Selectors allow channel-specific values to be set, with up to 64 channels allowed. This is mostly used for volume control. - Current/Next values. Some Control Selectors are 'Dual-Ranked'. Software may either update the Current value directly for immediate effect. Alternatively, software may write into the 'Next' values and update the SoundWire 1.2 'Commit Groups' register to copy 'Next' values into 'Current' ones in a synchronized manner. This is different from bank switching which is typically used to change the bus configuration only. - MBQ. the Multi-Byte Quantity bit is used to provide atomic updates when accessing more that one byte, for example a 16-bit volume control would be updated consistently, the intermediate values mixing old MSB with new LSB are not applied. These 6 parameters are used to build a 32-bit address to access the desired Controls. Because of address range, paging is required, but the most often used parameter values are placed in the lower 16 bits of the address. This helps to keep the paging registers constant while updating Controls for a specific Device/Function. Reviewed-by: Rander Wang Reviewed-by: Guennadi Liakhovetski Reviewed-by: Kai Vehmanen Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Acked-By: Vinod Koul Link: https://lore.kernel.org/r/20201103172226.4278-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw_registers.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_registers.h b/include/linux/soundwire/sdw_registers.h index f420e8059779..e14dff9a9c7f 100644 --- a/include/linux/soundwire/sdw_registers.h +++ b/include/linux/soundwire/sdw_registers.h @@ -298,4 +298,36 @@ #define SDW_CASC_PORT_MASK_INTSTAT3 1 #define SDW_CASC_PORT_REG_OFFSET_INTSTAT3 2 +/* + * v1.2 device - SDCA address mapping + * + * Spec definition + * Bits Contents + * 31 0 (required by addressing range) + * 30:26 0b10000 (Control Prefix) + * 25 0 (Reserved) + * 24:22 Function Number [2:0] + * 21 Entity[6] + * 20:19 Control Selector[5:4] + * 18 0 (Reserved) + * 17:15 Control Number[5:3] + * 14 Next + * 13 MBQ + * 12:7 Entity[5:0] + * 6:3 Control Selector[3:0] + * 2:0 Control Number[2:0] + */ + +#define SDW_SDCA_CTL(fun, ent, ctl, ch) (BIT(30) | \ + (((fun) & 0x7) << 22) | \ + (((ent) & 0x40) << 15) | \ + (((ent) & 0x3f) << 7) | \ + (((ctl) & 0x30) << 15) | \ + (((ctl) & 0x0f) << 3) | \ + (((ch) & 0x38) << 12) | \ + ((ch) & 0x07)) + +#define SDW_SDCA_MBQ_CTL(reg) ((reg) | BIT(13)) +#define SDW_SDCA_NEXT_CTL(reg) ((reg) | BIT(14)) + #endif /* __SDW_REGISTERS_H */ -- cgit v1.2.3 From fb5103f9d6ce197b4d0b67b4e60e68470f5293d1 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 4 Nov 2020 01:22:23 +0800 Subject: regmap/SoundWire: sdw: add support for SoundWire 1.2 MBQ The SoundWire 1.1 specification only allowed for reads and writes of bytes. The SoundWire 1.2 specification adds a new capability to transfer "Multi-Byte Quantities" (MBQ) across the bus. The transfers still happens one-byte-at-a-time, but the update is atomic. For example when writing a 16-bit volume, the first byte transferred is only taken into account when the second byte is successfully transferred. The mechanism is symmetrical for read and writes: - On a read, the address of the last byte to be read is modified by setting the MBQ bit - On a write, the address of all but the last byte to be written are modified by setting the MBQ bit. The address for the last byte relies on the MBQ bit being cleared. The current definitions for MBQ-based controls in the SDCA draft standard are limited to 16 bits for volumes, so for now this is the only supported format. An update will be provided if and when support for 24-bit and 32-bit values is specified by the SDCA standard. One possible objection is that this code could have been handled with regmap-sdw.c. However this is a new spec addition not handled by every SoundWire 1.1 and non-SDCA device, so there's no reason to load code that will never be used. Also in practice it's extremely unlikely that CONFIG_REGMAP would not be selected with CONFIG_REGMAP_MBQ selected. However there's no functional dependency between the two modules so they can be selected separately. Reviewed-by: Rander Wang Reviewed-by: Guennadi Liakhovetski Reviewed-by: Kai Vehmanen Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20201103172226.4278-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e7834d98207f..a652d1474d6a 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -570,6 +570,10 @@ struct regmap *__regmap_init_sdw(struct sdw_slave *sdw, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__regmap_init_sdw_mbq(struct sdw_slave *sdw, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__regmap_init_spi_avmm(struct spi_device *spi, const struct regmap_config *config, struct lock_class_key *lock_key, @@ -619,6 +623,10 @@ struct regmap *__devm_regmap_init_sdw(struct sdw_slave *sdw, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__devm_regmap_init_sdw_mbq(struct sdw_slave *sdw, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus, const struct regmap_config *config, struct lock_class_key *lock_key, @@ -817,6 +825,19 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); __regmap_lockdep_wrapper(__regmap_init_sdw, #config, \ sdw, config) +/** + * regmap_init_sdw_mbq() - Initialise register map + * + * @sdw: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_sdw_mbq(sdw, config) \ + __regmap_lockdep_wrapper(__regmap_init_sdw_mbq, #config, \ + sdw, config) + /** * regmap_init_spi_avmm() - Initialize register map for Intel SPI Slave * to AVMM Bus Bridge @@ -989,6 +1010,20 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); __regmap_lockdep_wrapper(__devm_regmap_init_sdw, #config, \ sdw, config) +/** + * devm_regmap_init_sdw_mbq() - Initialise managed register map + * + * @sdw: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_sdw_mbq(sdw, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_sdw_mbq, #config, \ + sdw, config) + /** * devm_regmap_init_slimbus() - Initialise managed register map * -- cgit v1.2.3 From 4df910620bebb5cfe234af16ac8f6474b60215fd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 25 Nov 2020 13:22:21 +0800 Subject: mm: memcg: relayout structure mem_cgroup to avoid cache interference 0day reported one -22.7% regression for will-it-scale page_fault2 case [1] on a 4 sockets 144 CPU platform, and bisected to it to be caused by Waiman's optimization (commit bd0b230fe1) of saving one 'struct page_counter' space for 'struct mem_cgroup'. Initially we thought it was due to the cache alignment change introduced by the patch, but further debug shows that it is due to some hot data members ('vmstats_local', 'vmstats_percpu', 'vmstats') sit in 2 adjacent cacheline (2N and 2N+1 cacheline), and when adjacent cache line prefetch is enabled, it triggers an "extended level" of cache false sharing for 2 adjacent cache lines. So exchange the 2 member blocks, while keeping mostly the original cache alignment, which can restore and even enhance the performance, and save 64 bytes of space for 'struct mem_cgroup' (from 2880 to 2816, with 0day's default RHEL-8.3 kernel config) [1]. https://lore.kernel.org/lkml/20201102091543.GM31092@shao2-debian/ Fixes: bd0b230fe145 ("mm/memcg: unify swap and memsw page counters") Reported-by: kernel test robot Signed-off-by: Feng Tang Acked-by: Waiman Long Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a80c59af2c60..922a7f600465 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -282,20 +282,6 @@ struct mem_cgroup { MEMCG_PADDING(_pad1_); - /* - * set > 0 if pages under this cgroup are moving to other cgroup. - */ - atomic_t moving_account; - struct task_struct *move_lock_task; - - /* Legacy local VM stats and events */ - struct memcg_vmstats_percpu __percpu *vmstats_local; - - /* Subtree VM stats and events (batched updates) */ - struct memcg_vmstats_percpu __percpu *vmstats_percpu; - - MEMCG_PADDING(_pad2_); - atomic_long_t vmstats[MEMCG_NR_STAT]; atomic_long_t vmevents[NR_VM_EVENT_ITEMS]; @@ -317,6 +303,20 @@ struct mem_cgroup { struct list_head objcg_list; /* list of inherited objcgs */ #endif + MEMCG_PADDING(_pad2_); + + /* + * set > 0 if pages under this cgroup are moving to other cgroup. + */ + atomic_t moving_account; + struct task_struct *move_lock_task; + + /* Legacy local VM stats and events */ + struct memcg_vmstats_percpu __percpu *vmstats_local; + + /* Subtree VM stats and events (batched updates) */ + struct memcg_vmstats_percpu __percpu *vmstats_percpu; + #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; struct wb_domain cgwb_domain; -- cgit v1.2.3 From 2a2970891647fee7e7ea767425f895140faffaa8 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Fri, 20 Nov 2020 15:03:24 -0800 Subject: net/mlx5: Add sample offload hardware bits and structures Hardware introduces flow sampler object for packet sampling. Add the offload hardware bits and structures. Signed-off-by: Chris Mi Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 651591a2965d..65ea35af0527 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10657,11 +10657,13 @@ struct mlx5_ifc_affiliated_event_header_bits { enum { MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT(0xc), MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT(0x13), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT(0x20), }; enum { MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc, MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13, + MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20, }; enum { @@ -10736,6 +10738,33 @@ struct mlx5_ifc_create_encryption_key_in_bits { struct mlx5_ifc_encryption_key_obj_bits encryption_key_object; }; +struct mlx5_ifc_sampler_obj_bits { + u8 modify_field_select[0x40]; + + u8 table_type[0x8]; + u8 level[0x8]; + u8 reserved_at_50[0xf]; + u8 ignore_flow_level[0x1]; + + u8 sample_ratio[0x20]; + + u8 reserved_at_80[0x8]; + u8 sample_table_id[0x18]; + + u8 reserved_at_a0[0x8]; + u8 default_table_id[0x18]; + + u8 sw_steering_icm_address_rx[0x40]; + u8 sw_steering_icm_address_tx[0x40]; + + u8 reserved_at_140[0xa0]; +}; + +struct mlx5_ifc_create_sampler_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + struct mlx5_ifc_sampler_obj_bits sampler_object; +}; + enum { MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128 = 0x0, MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256 = 0x1, -- cgit v1.2.3 From 38730630880c6f47ad73dd90524ff52443b8bc48 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Fri, 20 Nov 2020 15:03:25 -0800 Subject: net/mlx5: Add sampler destination type The flow sampler object is a new destination type. Add a new member for the flow destination. Signed-off-by: Chris Mi Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + include/linux/mlx5/mlx5_ifc.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 846d94ad04bc..35d2cc1646d3 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -132,6 +132,7 @@ struct mlx5_flow_destination { struct mlx5_pkt_reformat *pkt_reformat; u8 flags; } vport; + u32 sampler_id; }; }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 65ea35af0527..2f2add4bd5e1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1616,6 +1616,7 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0, MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, + MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6, MLX5_FLOW_DESTINATION_TYPE_PORT = 0x99, MLX5_FLOW_DESTINATION_TYPE_COUNTER = 0x100, -- cgit v1.2.3 From 7da3ad6c26f41f403fe6823c3de242551db09c37 Mon Sep 17 00:00:00 2001 From: Muhammad Sammar Date: Fri, 20 Nov 2020 15:03:27 -0800 Subject: net/mlx5: Add misc4 to mlx5_ifc_fte_match_param_bits Add misc4 match params to enable matching on prog_sample_fields. Signed-off-by: Muhammad Sammar Reviewed-by: Alex Vesker Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 1 + include/linux/mlx5/mlx5_ifc.h | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index cf824366a7d1..e9639c4cf2ed 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1076,6 +1076,7 @@ enum { MLX5_MATCH_INNER_HEADERS = 1 << 2, MLX5_MATCH_MISC_PARAMETERS_2 = 1 << 3, MLX5_MATCH_MISC_PARAMETERS_3 = 1 << 4, + MLX5_MATCH_MISC_PARAMETERS_4 = 1 << 5, }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2f2add4bd5e1..11c24fafd7f2 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -623,6 +623,26 @@ struct mlx5_ifc_fte_match_set_misc3_bits { u8 reserved_at_140[0xc0]; }; +struct mlx5_ifc_fte_match_set_misc4_bits { + u8 prog_sample_field_value_0[0x20]; + + u8 prog_sample_field_id_0[0x20]; + + u8 prog_sample_field_value_1[0x20]; + + u8 prog_sample_field_id_1[0x20]; + + u8 prog_sample_field_value_2[0x20]; + + u8 prog_sample_field_id_2[0x20]; + + u8 prog_sample_field_value_3[0x20]; + + u8 prog_sample_field_id_3[0x20]; + + u8 reserved_at_100[0x100]; +}; + struct mlx5_ifc_cmd_pas_bits { u8 pa_h[0x20]; @@ -1669,7 +1689,9 @@ struct mlx5_ifc_fte_match_param_bits { struct mlx5_ifc_fte_match_set_misc3_bits misc_parameters_3; - u8 reserved_at_a00[0x600]; + struct mlx5_ifc_fte_match_set_misc4_bits misc_parameters_4; + + u8 reserved_at_c00[0x400]; }; enum { @@ -5462,6 +5484,7 @@ enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2 = 0x3, MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_3 = 0x4, + MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_4 = 0x5, }; struct mlx5_ifc_query_flow_group_out_bits { -- cgit v1.2.3 From 59d2ae1db89f5a491d48f798ffccef36cc69ca65 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Fri, 20 Nov 2020 15:03:28 -0800 Subject: net/mlx5: Add ts_cqe_to_dest_cqn related bits Add a bit in HCA capabilities layout to indicate if ts_cqe_to_dest_cqn is supported. In addition, add ts_cqe_to_dest_cqn field to SQ context, for driver to set the actual CQN. Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 11c24fafd7f2..632b9a61fda5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1261,7 +1261,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ece_support[0x1]; u8 reserved_at_a4[0x7]; u8 log_max_srq[0x5]; - u8 reserved_at_b0[0x10]; + u8 reserved_at_b0[0x2]; + u8 ts_cqe_to_dest_cqn[0x1]; + u8 reserved_at_b3[0xd]; u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; @@ -3312,8 +3314,12 @@ struct mlx5_ifc_sqc_bits { u8 reserved_at_80[0x10]; u8 hairpin_peer_vhca[0x10]; - u8 reserved_at_a0[0x50]; + u8 reserved_at_a0[0x20]; + u8 reserved_at_c0[0x8]; + u8 ts_cqe_to_dest_cqn[0x18]; + + u8 reserved_at_e0[0x10]; u8 packet_pacing_rate_limit_index[0x10]; u8 tis_lst_sz[0x10]; u8 reserved_at_110[0x10]; -- cgit v1.2.3 From e5dfe6b57e8eada1c238dd2c7020345b0d8f6454 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 20 Nov 2020 15:03:29 -0800 Subject: net/mlx5: Avoid exposing driver internal command helpers mlx5 command init and cleanup routines are internal to mlx5_core driver. Hence, avoid exporting them and move their definition to mlx5_core driver's internal file mlx5_core.h Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index add85094f9a5..5e84b1d53650 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -888,10 +888,6 @@ enum { CMD_ALLOWED_OPCODE_ALL, }; -int mlx5_cmd_init(struct mlx5_core_dev *dev); -void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); -void mlx5_cmd_set_state(struct mlx5_core_dev *dev, - enum mlx5_cmdif_state cmdif_state); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); -- cgit v1.2.3 From 349125ba232ea53d71a57c65c81f109c323cc369 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 20 Nov 2020 15:03:31 -0800 Subject: net/mlx5: Update the hardware interface definition for vhca state Update the hardware interface definitions to query and modify vhca state, related EQE and event code. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 7 +++++++ include/linux/mlx5/mlx5_ifc.h | 17 ++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index e9639c4cf2ed..f1de49d64a98 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -346,6 +346,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED = 0xe, + MLX5_EVENT_TYPE_VHCA_STATE_CHANGE = 0xf, MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, MLX5_EVENT_TYPE_DCT_KEY_VIOLATION = 0x1d, @@ -717,6 +718,11 @@ struct mlx5_eqe_sync_fw_update { u8 sync_rst_state; }; +struct mlx5_eqe_vhca_state { + __be16 ec_function; + __be16 function_id; +} __packed; + union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; @@ -736,6 +742,7 @@ union ev_data { struct mlx5_eqe_temp_warning temp_warning; struct mlx5_eqe_xrq_err xrq_err; struct mlx5_eqe_sync_fw_update sync_fw_update; + struct mlx5_eqe_vhca_state vhca_state; } __packed; struct mlx5_eqe { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 632b9a61fda5..3ace1976514c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -299,6 +299,8 @@ enum { MLX5_CMD_OP_CREATE_UMEM = 0xa08, MLX5_CMD_OP_DESTROY_UMEM = 0xa0a, MLX5_CMD_OP_SYNC_STEERING = 0xb00, + MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d, + MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_MAX }; @@ -1244,7 +1246,15 @@ enum mlx5_fc_bulk_alloc_bitmask { #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) struct mlx5_ifc_cmd_hca_cap_bits { - u8 reserved_at_0[0x30]; + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x3]; + u8 event_on_vhca_state_teardown_request[0x1]; + u8 event_on_vhca_state_in_use[0x1]; + u8 event_on_vhca_state_active[0x1]; + u8 event_on_vhca_state_allocated[0x1]; + u8 event_on_vhca_state_invalid[0x1]; + u8 reserved_at_28[0x8]; u8 vhca_id[0x10]; u8 reserved_at_40[0x40]; @@ -1534,7 +1544,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 disable_local_lb_uc[0x1]; u8 disable_local_lb_mc[0x1]; u8 log_min_hairpin_wq_data_sz[0x5]; - u8 reserved_at_3e8[0x3]; + u8 reserved_at_3e8[0x2]; + u8 vhca_state[0x1]; u8 log_max_vlan_list[0x5]; u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; @@ -1602,7 +1613,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_num_of_monitor_counters[0x10]; u8 num_ppcnt_monitor_counters[0x10]; - u8 reserved_at_640[0x10]; + u8 max_num_sf[0x10]; u8 num_q_monitor_counters[0x10]; u8 reserved_at_660[0x20]; -- cgit v1.2.3 From 21adf05d4584c99a07a604224b9cfeddcc6bc47c Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Fri, 20 Nov 2020 15:03:32 -0800 Subject: net/mlx5: Expose IP-in-IP TX and RX capability bits Expose FW indication that it supports stateless offloads for IP over IP tunneled packets per direction. In some HW like ConnectX-4 IP-in-IP support is not symmetric, it supports steering on the inner header but it doesn't TX-Checksum and TSO. Add IP-in-IP capability per direction to cover this case as well. Note: only if both indications are turned on, the global tunnel_stateless_ip_over_ip is on too. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3ace1976514c..96888f9f822d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -913,7 +913,10 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 tunnel_stateless_ipv4_over_vxlan[0x1]; u8 tunnel_stateless_ip_over_ip[0x1]; u8 insert_trailer[0x1]; - u8 reserved_at_2b[0x5]; + u8 reserved_at_2b[0x1]; + u8 tunnel_stateless_ip_over_ip_rx[0x1]; + u8 tunnel_stateless_ip_over_ip_tx[0x1]; + u8 reserved_at_2e[0x2]; u8 max_vxlan_udp_ports[0x8]; u8 reserved_at_38[0x6]; u8 max_geneve_opt_len[0x1]; -- cgit v1.2.3 From 959af5569f57d189b5c4fccae45f16d5ff01aa39 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Fri, 20 Nov 2020 15:03:33 -0800 Subject: net/mlx5: Expose other function ifc bits Expose other function ifc bits to enable setting HCA caps on behalf of other function. In addition, expose vhca_resource_manager bit to control whether the other function functionality is supported by firmware. Signed-off-by: Yishai Hadas Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 96888f9f822d..3e337386faa8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1249,7 +1249,8 @@ enum mlx5_fc_bulk_alloc_bitmask { #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) struct mlx5_ifc_cmd_hca_cap_bits { - u8 reserved_at_0[0x20]; + u8 reserved_at_0[0x1f]; + u8 vhca_resource_manager[0x1]; u8 reserved_at_20[0x3]; u8 event_on_vhca_state_teardown_request[0x1]; @@ -4247,7 +4248,11 @@ struct mlx5_ifc_set_hca_cap_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_function[0x1]; + u8 reserved_at_41[0xf]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; union mlx5_ifc_hca_cap_union_bits capability; }; -- cgit v1.2.3 From 3b1e58aa832ed537289be6a51a2015309688a90c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 20 Nov 2020 15:03:36 -0800 Subject: net/mlx5: Make API mlx5_core_is_ecpf accept const pointer Subsequent patch implements helper API which has mlx5_core_dev as const pointer, make its caller API too const *. Signed-off-by: Parav Pandit Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5e84b1d53650..d6ef3068d7d3 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1133,7 +1133,7 @@ static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev) return dev->coredev_type == MLX5_COREDEV_VF; } -static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) +static inline bool mlx5_core_is_ecpf(const struct mlx5_core_dev *dev) { return dev->caps.embedded_cpu; } -- cgit v1.2.3 From 8a90f2fc67826a3876df1cb72e42d4a9a135f4fa Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 20 Nov 2020 15:03:37 -0800 Subject: net/mlx5: Rename peer_pf to host_pf To match the hardware spec, rename peer_pf to host_pf. Signed-off-by: Parav Pandit Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d6ef3068d7d3..8e9bcb3bfd77 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -547,7 +547,7 @@ struct mlx5_priv { atomic_t reg_pages; struct list_head free_list; int vfs_pages; - int peer_pf_pages; + int host_pf_pages; struct mlx5_core_health health; -- cgit v1.2.3 From 617b860c1875842d9cc3338d7dabd2b3538038f1 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 20 Nov 2020 15:03:39 -0800 Subject: net/mlx5: Treat host PF vport as other (non eswitch manager) vport When eswitch manager is running on ECPF, host PF should be treated as non eswitch manager port, similar to other VF vports. Fail to do so, results in firmware treating PF's vport as ECPF vport for eswitch ACL tables. Non zero check to figure out if a given vport is other vport or not is not sufficient becase PF vport number = 0 on ECPF. Hence, create esw acl tables with an attribute of other vport. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 35d2cc1646d3..1f51f4c3b1af 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -50,6 +50,7 @@ enum { MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1), MLX5_FLOW_TABLE_TERMINATION = BIT(2), MLX5_FLOW_TABLE_UNMANAGED = BIT(3), + MLX5_FLOW_TABLE_OTHER_VPORT = BIT(4), }; #define LEFTOVERS_RULE_NUM 2 @@ -174,9 +175,7 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int num_flow_table_entries, - u32 level, u16 vport); + struct mlx5_flow_table_attr *ft_attr, u16 vport); struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( struct mlx5_flow_namespace *ns, int prio, u32 level); -- cgit v1.2.3 From 8d8d53cf8fd028310b1189165b939cde124895d7 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 29 Oct 2020 12:52:40 +1100 Subject: dma-mapping: Allow mixing bypass and mapped DMA operation At the moment we allow bypassing DMA ops only when we can do this for the entire RAM. However there are configs with mixed type memory where we could still allow bypassing IOMMU in most cases; POWERPC with persistent memory is one example. This adds an arch hook to determine where bypass can still work and we invoke direct DMA API. The following patch checks the bus limit on POWERPC to allow or disallow direct mapping. This adds a ARCH_HAS_DMA_MAP_DIRECT config option to make the arch_xxxx hooks no-op by default. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index a5f89fc4d6df..38c8a4558e08 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -314,6 +314,20 @@ static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) void *arch_dma_set_uncached(void *addr, size_t size); void arch_dma_clear_uncached(void *addr, size_t size); +#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT +bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr); +bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle); +bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg, + int nents); +bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, + int nents); +#else +#define arch_dma_map_page_direct(d, a) (false) +#define arch_dma_unmap_page_direct(d, a) (false) +#define arch_dma_map_sg_direct(d, s, n) (false) +#define arch_dma_unmap_sg_direct(d, s, n) (false) +#endif + #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); -- cgit v1.2.3 From c7a5899eb26e2a4d516d53f65b6dd67be2228041 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Tue, 17 Nov 2020 17:47:23 +0100 Subject: xfrm: redact SA secret with lockdown confidentiality redact XFRM SA secret in the netlink response to xfrm_get_sa() or dumpall sa. Enable lockdown, confidentiality mode, at boot or at run time. e.g. when enabled: cat /sys/kernel/security/lockdown none integrity [confidentiality] ip xfrm state src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x00000002 reqid 2 mode tunnel replay-window 0 aead rfc4106(gcm(aes)) 0x0000000000000000000000000000000000000000 96 note: the aead secret is redacted. Redacting secret is also a FIPS 140-2 requirement. v1->v2 - add size checks before memset calls v2->v3 - replace spaces with tabs for consistency v3->v4 - use kernel lockdown instead of a /proc setting v4->v5 - remove kconfig option Reviewed-by: Stephan Mueller Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/linux/security.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index bc2725491560..1112a79a7dba 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -127,6 +127,7 @@ enum lockdown_reason { LOCKDOWN_PERF, LOCKDOWN_TRACEFS, LOCKDOWN_XMON_RW, + LOCKDOWN_XFRM_SECRET, LOCKDOWN_CONFIDENTIALITY_MAX, }; -- cgit v1.2.3 From 928296ea5da37838d7127de4b10f47cd97401b13 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 30 Oct 2020 12:36:11 +0100 Subject: soc: mediatek: pm_domains: Make bus protection generic Bus protection is not exclusively done by calling the infracfg misc driver. Make the calls for setting and clearing the bus protection generic so that we can use other blocks for it as well. Signed-off-by: Matthias Brugger Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20201030113622.201188-6-enric.balletbo@collabora.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 233463d789c6..5bcaab767f6a 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -32,6 +32,11 @@ #define MT7622_TOP_AXI_PROT_EN_WB (BIT(2) | BIT(6) | \ BIT(7) | BIT(8)) +#define INFRA_TOPAXI_PROTECTEN 0x0220 +#define INFRA_TOPAXI_PROTECTSTA1 0x0228 +#define INFRA_TOPAXI_PROTECTEN_SET 0x0260 +#define INFRA_TOPAXI_PROTECTEN_CLR 0x0264 + #define REG_INFRA_MISC 0xf00 #define F_DDR_4GB_SUPPORT_EN BIT(13) -- cgit v1.2.3 From eb9fa767fbe19d3db7d303e9fde7f3056221ffe1 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 30 Oct 2020 12:36:17 +0100 Subject: soc: mediatek: pm-domains: Add support for mt8183 Add the needed board data to support mt8183 SoC. Signed-off-by: Matthias Brugger Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20201030113622.201188-12-enric.balletbo@collabora.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 46 +++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 5bcaab767f6a..9d01e32e19bc 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -2,6 +2,52 @@ #ifndef __SOC_MEDIATEK_INFRACFG_H #define __SOC_MEDIATEK_INFRACFG_H +#define MT8183_TOP_AXI_PROT_EN_STA1 0x228 +#define MT8183_TOP_AXI_PROT_EN_STA1_1 0x258 +#define MT8183_TOP_AXI_PROT_EN_SET 0x2a0 +#define MT8183_TOP_AXI_PROT_EN_CLR 0x2a4 +#define MT8183_TOP_AXI_PROT_EN_1_SET 0x2a8 +#define MT8183_TOP_AXI_PROT_EN_1_CLR 0x2ac +#define MT8183_TOP_AXI_PROT_EN_MCU_SET 0x2c4 +#define MT8183_TOP_AXI_PROT_EN_MCU_CLR 0x2c8 +#define MT8183_TOP_AXI_PROT_EN_MCU_STA1 0x2e4 +#define MT8183_TOP_AXI_PROT_EN_MM_SET 0x2d4 +#define MT8183_TOP_AXI_PROT_EN_MM_CLR 0x2d8 +#define MT8183_TOP_AXI_PROT_EN_MM_STA1 0x2ec + +#define MT8183_TOP_AXI_PROT_EN_DISP (BIT(10) | BIT(11)) +#define MT8183_TOP_AXI_PROT_EN_CONN (BIT(13) | BIT(14)) +#define MT8183_TOP_AXI_PROT_EN_MFG (BIT(21) | BIT(22)) +#define MT8183_TOP_AXI_PROT_EN_CAM BIT(28) +#define MT8183_TOP_AXI_PROT_EN_VPU_TOP BIT(27) +#define MT8183_TOP_AXI_PROT_EN_1_DISP (BIT(16) | BIT(17)) +#define MT8183_TOP_AXI_PROT_EN_1_MFG GENMASK(21, 19) +#define MT8183_TOP_AXI_PROT_EN_MM_ISP (BIT(3) | BIT(8)) +#define MT8183_TOP_AXI_PROT_EN_MM_ISP_2ND BIT(10) +#define MT8183_TOP_AXI_PROT_EN_MM_CAM (BIT(4) | BIT(5) | \ + BIT(9) | BIT(13)) +#define MT8183_TOP_AXI_PROT_EN_MM_VPU_TOP (GENMASK(9, 6) | \ + BIT(12)) +#define MT8183_TOP_AXI_PROT_EN_MM_VPU_TOP_2ND (BIT(10) | BIT(11)) +#define MT8183_TOP_AXI_PROT_EN_MM_CAM_2ND BIT(11) +#define MT8183_TOP_AXI_PROT_EN_MCU_VPU_CORE0_2ND (BIT(0) | BIT(2) | \ + BIT(4)) +#define MT8183_TOP_AXI_PROT_EN_MCU_VPU_CORE1_2ND (BIT(1) | BIT(3) | \ + BIT(5)) +#define MT8183_TOP_AXI_PROT_EN_MCU_VPU_CORE0 BIT(6) +#define MT8183_TOP_AXI_PROT_EN_MCU_VPU_CORE1 BIT(7) + +#define MT8183_SMI_COMMON_CLAMP_EN 0x3c0 +#define MT8183_SMI_COMMON_CLAMP_EN_SET 0x3c4 +#define MT8183_SMI_COMMON_CLAMP_EN_CLR 0x3c8 + +#define MT8183_SMI_COMMON_SMI_CLAMP_DISP GENMASK(7, 0) +#define MT8183_SMI_COMMON_SMI_CLAMP_VENC BIT(1) +#define MT8183_SMI_COMMON_SMI_CLAMP_ISP BIT(2) +#define MT8183_SMI_COMMON_SMI_CLAMP_CAM (BIT(3) | BIT(4)) +#define MT8183_SMI_COMMON_SMI_CLAMP_VPU_TOP (BIT(5) | BIT(6)) +#define MT8183_SMI_COMMON_SMI_CLAMP_VDEC BIT(7) + #define MT8173_TOP_AXI_PROT_EN_MCI_M2 BIT(0) #define MT8173_TOP_AXI_PROT_EN_MM_M0 BIT(1) #define MT8173_TOP_AXI_PROT_EN_MM_M1 BIT(2) -- cgit v1.2.3 From a49d5e7a89d644a5c0ddc851be4bbf08614e6015 Mon Sep 17 00:00:00 2001 From: Weiyi Lu Date: Fri, 30 Oct 2020 12:36:22 +0100 Subject: soc: mediatek: pm-domains: Add support for mt8192 Add the needed board data to support mt8192 SoC. Signed-off-by: Weiyi Lu Signed-off-by: Enric Balletbo i Serra Tested-by: Weiyi Lu Link: https://lore.kernel.org/r/20201030113622.201188-17-enric.balletbo@collabora.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 56 +++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 9d01e32e19bc..e7842debc05d 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -2,6 +2,62 @@ #ifndef __SOC_MEDIATEK_INFRACFG_H #define __SOC_MEDIATEK_INFRACFG_H +#define MT8192_TOP_AXI_PROT_EN_STA1 0x228 +#define MT8192_TOP_AXI_PROT_EN_1_STA1 0x258 +#define MT8192_TOP_AXI_PROT_EN_SET 0x2a0 +#define MT8192_TOP_AXI_PROT_EN_CLR 0x2a4 +#define MT8192_TOP_AXI_PROT_EN_1_SET 0x2a8 +#define MT8192_TOP_AXI_PROT_EN_1_CLR 0x2ac +#define MT8192_TOP_AXI_PROT_EN_MM_SET 0x2d4 +#define MT8192_TOP_AXI_PROT_EN_MM_CLR 0x2d8 +#define MT8192_TOP_AXI_PROT_EN_MM_STA1 0x2ec +#define MT8192_TOP_AXI_PROT_EN_2_SET 0x714 +#define MT8192_TOP_AXI_PROT_EN_2_CLR 0x718 +#define MT8192_TOP_AXI_PROT_EN_2_STA1 0x724 +#define MT8192_TOP_AXI_PROT_EN_VDNR_SET 0xb84 +#define MT8192_TOP_AXI_PROT_EN_VDNR_CLR 0xb88 +#define MT8192_TOP_AXI_PROT_EN_VDNR_STA1 0xb90 +#define MT8192_TOP_AXI_PROT_EN_MM_2_SET 0xdcc +#define MT8192_TOP_AXI_PROT_EN_MM_2_CLR 0xdd0 +#define MT8192_TOP_AXI_PROT_EN_MM_2_STA1 0xdd8 + +#define MT8192_TOP_AXI_PROT_EN_DISP (BIT(6) | BIT(23)) +#define MT8192_TOP_AXI_PROT_EN_CONN (BIT(13) | BIT(18)) +#define MT8192_TOP_AXI_PROT_EN_CONN_2ND BIT(14) +#define MT8192_TOP_AXI_PROT_EN_MFG1 GENMASK(22, 21) +#define MT8192_TOP_AXI_PROT_EN_1_CONN BIT(10) +#define MT8192_TOP_AXI_PROT_EN_1_MFG1 BIT(21) +#define MT8192_TOP_AXI_PROT_EN_1_CAM BIT(22) +#define MT8192_TOP_AXI_PROT_EN_2_CAM BIT(0) +#define MT8192_TOP_AXI_PROT_EN_2_ADSP BIT(3) +#define MT8192_TOP_AXI_PROT_EN_2_AUDIO BIT(4) +#define MT8192_TOP_AXI_PROT_EN_2_MFG1 GENMASK(6, 5) +#define MT8192_TOP_AXI_PROT_EN_2_MFG1_2ND BIT(7) +#define MT8192_TOP_AXI_PROT_EN_MM_CAM (BIT(0) | BIT(2)) +#define MT8192_TOP_AXI_PROT_EN_MM_DISP (BIT(0) | BIT(2) | \ + BIT(10) | BIT(12) | \ + BIT(14) | BIT(16) | \ + BIT(24) | BIT(26)) +#define MT8192_TOP_AXI_PROT_EN_MM_CAM_2ND (BIT(1) | BIT(3)) +#define MT8192_TOP_AXI_PROT_EN_MM_DISP_2ND (BIT(1) | BIT(3) | \ + BIT(15) | BIT(17) | \ + BIT(25) | BIT(27)) +#define MT8192_TOP_AXI_PROT_EN_MM_ISP2 BIT(14) +#define MT8192_TOP_AXI_PROT_EN_MM_ISP2_2ND BIT(15) +#define MT8192_TOP_AXI_PROT_EN_MM_IPE BIT(16) +#define MT8192_TOP_AXI_PROT_EN_MM_IPE_2ND BIT(17) +#define MT8192_TOP_AXI_PROT_EN_MM_VDEC BIT(24) +#define MT8192_TOP_AXI_PROT_EN_MM_VDEC_2ND BIT(25) +#define MT8192_TOP_AXI_PROT_EN_MM_VENC BIT(26) +#define MT8192_TOP_AXI_PROT_EN_MM_VENC_2ND BIT(27) +#define MT8192_TOP_AXI_PROT_EN_MM_2_ISP BIT(8) +#define MT8192_TOP_AXI_PROT_EN_MM_2_DISP (BIT(8) | BIT(12)) +#define MT8192_TOP_AXI_PROT_EN_MM_2_ISP_2ND BIT(9) +#define MT8192_TOP_AXI_PROT_EN_MM_2_DISP_2ND (BIT(9) | BIT(13)) +#define MT8192_TOP_AXI_PROT_EN_MM_2_MDP BIT(12) +#define MT8192_TOP_AXI_PROT_EN_MM_2_MDP_2ND BIT(13) +#define MT8192_TOP_AXI_PROT_EN_VDNR_CAM BIT(21) + #define MT8183_TOP_AXI_PROT_EN_STA1 0x228 #define MT8183_TOP_AXI_PROT_EN_STA1_1 0x258 #define MT8183_TOP_AXI_PROT_EN_SET 0x2a0 -- cgit v1.2.3 From 431ec7bd4d52caa2fc20fbe87744f522e3d1efad Mon Sep 17 00:00:00 2001 From: Michael Klein Date: Fri, 27 Nov 2020 13:52:01 +0100 Subject: mfd: si476x-core.h: Fix "regulator" spelling in comment "regualtor" -> "regulator" Signed-off-by: Michael Klein Signed-off-by: Lee Jones --- include/linux/mfd/si476x-core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/si476x-core.h b/include/linux/mfd/si476x-core.h index 4708c2b8512a..dd95c37ca134 100644 --- a/include/linux/mfd/si476x-core.h +++ b/include/linux/mfd/si476x-core.h @@ -57,7 +57,7 @@ enum si476x_mfd_cells { * @SI476X_POWER_DOWN: In this state all regulators are turned off * and the reset line is pulled low. The device is completely * inactive. - * @SI476X_POWER_UP_FULL: In this state all the power regualtors are + * @SI476X_POWER_UP_FULL: In this state all the power regulators are * turned on, reset line pulled high, IRQ line is enabled(polling is * active for polling use scenario) and device is turned on with * POWER_UP command. The device is ready to be used. -- cgit v1.2.3 From 6a0eaf5123e0e1223252b88cd5775f74105e27bd Mon Sep 17 00:00:00 2001 From: Dean Camera Date: Thu, 26 Nov 2020 09:39:34 +1100 Subject: HID: Increase HID maximum report size to 16KB Currently the maximum HID report size which can be buffered by the kernel is 8KB. This is sufficient for the vast majority of HID devices on the market, as most HID reports are fairly small. However, some unusual devices such as the Elgate Stream Deck exist which use a report size slightly over 8KB for the image data that is sent to the device. Reports these large cannot be buffered by the regular HID subsystem currently, thus the only way to use such device is to bypass the HID subsystem entirely. This increases the maximum HID report size to 16KB, which should cover all sanely designed HID devices. Signed-off-by: Dean Camera Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index b079146850e6..c39d71eb1fd0 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -494,7 +494,7 @@ struct hid_report_enum { }; #define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */ -#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */ +#define HID_MAX_BUFFER_SIZE 16384 /* 16kb */ #define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */ #define HID_OUTPUT_FIFO_SIZE 64 -- cgit v1.2.3 From fd3bb8f54a88107570334c156efb0c724a261003 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Fri, 27 Nov 2020 10:28:34 +0000 Subject: nvmem: core: Add support for keepout regions Introduce support into the nvmem core for arrays of register ranges that should not result in actual device access. For these regions a constant byte (repeated) is returned instead on read, and writes are quietly ignored and returned as successful. This is useful for instance if certain efuse regions are protected from access by Linux because they contain secret info to another part of the system (like an integrated modem). Signed-off-by: Evan Green Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20201127102837.19366-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 06409a6c40bc..e162b757b6d5 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -30,6 +30,19 @@ enum nvmem_type { #define NVMEM_DEVID_NONE (-1) #define NVMEM_DEVID_AUTO (-2) +/** + * struct nvmem_keepout - NVMEM register keepout range. + * + * @start: The first byte offset to avoid. + * @end: One beyond the last byte offset to avoid. + * @value: The byte to fill reads with for this region. + */ +struct nvmem_keepout { + unsigned int start; + unsigned int end; + unsigned char value; +}; + /** * struct nvmem_config - NVMEM device configuration * @@ -39,6 +52,8 @@ enum nvmem_type { * @owner: Pointer to exporter module. Used for refcounting. * @cells: Optional array of pre-defined NVMEM cells. * @ncells: Number of elements in cells. + * @keepout: Optional array of keepout ranges (sorted ascending by start). + * @nkeepout: Number of elements in the keepout array. * @type: Type of the nvmem storage * @read_only: Device is read-only. * @root_only: Device is accessibly to root only. @@ -66,6 +81,8 @@ struct nvmem_config { struct gpio_desc *wp_gpio; const struct nvmem_cell_info *cells; int ncells; + const struct nvmem_keepout *keepout; + unsigned int nkeepout; enum nvmem_type type; bool read_only; bool root_only; -- cgit v1.2.3 From 24c8a743336a1fdf42c0c768b4435633069c6a39 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 30 Sep 2020 20:48:02 +0200 Subject: pcmcia: at91_cf: move definitions locally struct at91_cf_data is only used in the driver since all the platforms moved to device tree, move its definition locally. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200930184804.3127757-1-alexandre.belloni@bootlin.com --- include/linux/platform_data/atmel.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h index 99e6069c5fd8..73f63be509c4 100644 --- a/include/linux/platform_data/atmel.h +++ b/include/linux/platform_data/atmel.h @@ -6,18 +6,6 @@ #ifndef __ATMEL_H__ #define __ATMEL_H__ - /* Compact Flash */ -struct at91_cf_data { - int irq_pin; /* I/O IRQ */ - int det_pin; /* Card detect */ - int vcc_pin; /* power switching */ - int rst_pin; /* card reset */ - u8 chipselect; /* EBI Chip Select number */ - u8 flags; -#define AT91_CF_TRUE_IDE 0x01 -#define AT91_IDE_SWAP_A0_A2 0x02 -}; - /* FIXME: this needs a better location, but gets stuff building again */ #ifdef CONFIG_ATMEL_PM extern int at91_suspend_entering_slow_clock(void); -- cgit v1.2.3 From a69dcdfc2dd21f86cb1f79f98fc94c52f96cff64 Mon Sep 17 00:00:00 2001 From: Chun-Kuang Hu Date: Mon, 2 Nov 2020 08:04:38 +0800 Subject: soc / drm: mediatek: cmdq: Remove timeout handler in helper function For each client driver, its timeout handler need to dump hardware register or its state machine information, and their way to detect timeout are also different, so remove timeout handler in helper function and let client driver implement its own timeout handler. Signed-off-by: Chun-Kuang Hu Acked-by: Matthias Brugger Link: https://lore.kernel.org/r/20201102000438.29225-1-chunkuang.hu@kernel.org Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-cmdq.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 960704d75994..8e9996610978 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -11,7 +11,6 @@ #include #include -#define CMDQ_NO_TIMEOUT 0xffffffffu #define CMDQ_ADDR_HIGH(addr) ((u32)(((addr) >> 16) & GENMASK(31, 0))) #define CMDQ_ADDR_LOW(addr) ((u16)(addr) | BIT(1)) @@ -24,12 +23,8 @@ struct cmdq_client_reg { }; struct cmdq_client { - spinlock_t lock; - u32 pkt_cnt; struct mbox_client client; struct mbox_chan *chan; - struct timer_list timer; - u32 timeout_ms; /* in unit of microsecond */ }; /** @@ -51,13 +46,10 @@ int cmdq_dev_get_client_reg(struct device *dev, * cmdq_mbox_create() - create CMDQ mailbox client and channel * @dev: device of CMDQ mailbox client * @index: index of CMDQ mailbox channel - * @timeout: timeout of a pkt execution by GCE, in unit of microsecond, set - * CMDQ_NO_TIMEOUT if a timer is not used. * * Return: CMDQ mailbox client pointer */ -struct cmdq_client *cmdq_mbox_create(struct device *dev, int index, - u32 timeout); +struct cmdq_client *cmdq_mbox_create(struct device *dev, int index); /** * cmdq_mbox_destroy() - destroy CMDQ mailbox client and channel -- cgit v1.2.3 From 51c0e618b219c025ddaaf14baea8942cb7e2105b Mon Sep 17 00:00:00 2001 From: Yongqiang Niu Date: Tue, 6 Oct 2020 21:33:17 +0200 Subject: soc / drm: mediatek: Move DDP component defines into mtk-mmsys.h MMSYS is the driver which controls the routing of these DDP components, so the definition of the mtk_ddp_comp_id enum should be placed in mtk-mmsys.h Signed-off-by: Yongqiang Niu Signed-off-by: Enric Balletbo i Serra Reviewed-by: Chun-Kuang Hu Link: https://lore.kernel.org/r/20201006193320.405529-2-enric.balletbo@collabora.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-mmsys.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index 7bab5d9a3d31..2228bf6133da 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -9,6 +9,39 @@ enum mtk_ddp_comp_id; struct device; +enum mtk_ddp_comp_id { + DDP_COMPONENT_AAL0, + DDP_COMPONENT_AAL1, + DDP_COMPONENT_BLS, + DDP_COMPONENT_CCORR, + DDP_COMPONENT_COLOR0, + DDP_COMPONENT_COLOR1, + DDP_COMPONENT_DITHER, + DDP_COMPONENT_DPI0, + DDP_COMPONENT_DPI1, + DDP_COMPONENT_DSI0, + DDP_COMPONENT_DSI1, + DDP_COMPONENT_DSI2, + DDP_COMPONENT_DSI3, + DDP_COMPONENT_GAMMA, + DDP_COMPONENT_OD0, + DDP_COMPONENT_OD1, + DDP_COMPONENT_OVL0, + DDP_COMPONENT_OVL_2L0, + DDP_COMPONENT_OVL_2L1, + DDP_COMPONENT_OVL1, + DDP_COMPONENT_PWM0, + DDP_COMPONENT_PWM1, + DDP_COMPONENT_PWM2, + DDP_COMPONENT_RDMA0, + DDP_COMPONENT_RDMA1, + DDP_COMPONENT_RDMA2, + DDP_COMPONENT_UFOE, + DDP_COMPONENT_WDMA0, + DDP_COMPONENT_WDMA1, + DDP_COMPONENT_ID_MAX, +}; + void mtk_mmsys_ddp_connect(struct device *dev, enum mtk_ddp_comp_id cur, enum mtk_ddp_comp_id next); -- cgit v1.2.3 From 206e7383b34316cf56cdde96eab9d97e9a1dbd70 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 26 Nov 2020 11:20:35 +0100 Subject: bus: mhi: core: Indexed MHI controller name Today the MHI controller name is simply cloned from the underlying bus device (its parent), that gives the following device structure for e.g. a MHI/PCI controller: devices/pci0000:00/0000:00:01.2/0000:02:00.0/0000:02:00.0 devices/pci0000:00/0000:00:01.2/0000:02:00.0/0000:02:00.0/0000:02:00.0_IPCR ... That's quite misleading/confusing and can cause device registering issues because of duplicate dev name (e.g. if a PCI device register two different MHI instances). This patch changes MHI core to create indexed mhi controller names (mhi0, mhi1...) in the same way as other busses (i2c0, usb0...). The previous example becomes: devices/pci0000:00/0000:00:01.2/0000:02:00.0/mhi0 devices/pci0000:00/0000:00:01.2/0000:02:00.0/mhi0/mhi0_IPCR ... v2: move index field at the end of mhi_controller struct (before bool) to avoid breaking well packed alignment. Signed-off-by: Loic Poulain Reviewed-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d31efcf02ae7..aa9757e71f1f 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -348,6 +348,7 @@ struct mhi_controller_config { * @read_reg: Read a MHI register via the physical link (required) * @write_reg: Write a MHI register via the physical link (required) * @buffer_len: Bounce buffer length + * @index: Index of the MHI controller instance * @bounce_buf: Use of bounce buffer * @fbc_download: MHI host needs to do complete image transfer (optional) * @pre_init: MHI host needs to do pre-initialization before power up @@ -438,6 +439,7 @@ struct mhi_controller { u32 val); size_t buffer_len; + int index; bool bounce_buf; bool fbc_download; bool pre_init; -- cgit v1.2.3 From 7776bcd241e08e13ef009926c6dea84dc3b2f8ff Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 3 Nov 2020 00:48:44 +0100 Subject: power: supply: s3c-adc-battery: Convert to GPIO descriptors This converts the S3C ADC battery to use GPIO descriptors instead of a global GPIO number for the charging completed GPIO. Using the pattern from the GPIO charger we name this GPIO line "charge-status" in the board file. Cc: linux-samsung-soc@vger.kernel.org Cc: Sergiy Kibrik Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- include/linux/s3c_adc_battery.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/s3c_adc_battery.h b/include/linux/s3c_adc_battery.h index 833871dcf6fd..57f982c375f8 100644 --- a/include/linux/s3c_adc_battery.h +++ b/include/linux/s3c_adc_battery.h @@ -14,9 +14,6 @@ struct s3c_adc_bat_pdata { void (*enable_charger)(void); void (*disable_charger)(void); - int gpio_charge_finished; - int gpio_inverted; - const struct s3c_adc_bat_thresh *lut_noac; unsigned int lut_noac_cnt; const struct s3c_adc_bat_thresh *lut_acin; -- cgit v1.2.3 From b0327ffb133fb2148fc3bc2afb39af2871ab21cb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 30 Oct 2020 13:24:24 +0100 Subject: power: supply: generic-adc-battery: Use GPIO descriptors This driver uses platform data to pass GPIO lines using the deprecated global GPIO numbers. There are no in-tree users of this platform data. Any out-of-tree or coming users of this driver can easily be migrated to use machine descriptor tables as described in Documentation/driver-api/gpio/board.rst section "platform data". Cc: Anish Kumar Cc: H. Nikolaus Schaller Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- include/linux/power/generic-adc-battery.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/generic-adc-battery.h b/include/linux/power/generic-adc-battery.h index 40f9c7628f7b..c68cbf34cd34 100644 --- a/include/linux/power/generic-adc-battery.h +++ b/include/linux/power/generic-adc-battery.h @@ -11,16 +11,12 @@ * @battery_info: recommended structure to specify static power supply * parameters * @cal_charge: calculate charge level. - * @gpio_charge_finished: gpio for the charger. - * @gpio_inverted: Should be 1 if the GPIO is active low otherwise 0 * @jitter_delay: delay required after the interrupt to check battery * status.Default set is 10ms. */ struct gab_platform_data { struct power_supply_info battery_info; int (*cal_charge)(long value); - int gpio_charge_finished; - bool gpio_inverted; int jitter_delay; }; -- cgit v1.2.3 From 51e7bf4534da678da27c0f51e7ff21804fae88ca Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:05 +0200 Subject: mtd: nand: ecc: Add an I/O request tweaking mechanism Currently, BCH and Hamming engine are sharing the same tweaking/restoring I/O mechanism: they need the I/O request to fully cover the main/OOB area. Let's make this code generic as sharing the code between two drivers is already a win. Maybe other ECC engine drivers will need it too. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-2-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 697ea2474a7c..36e4fe08d0ea 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -278,6 +278,38 @@ int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); +/** + * struct nand_ecc_req_tweak_ctx - Help for automatically tweaking requests + * @orig_req: Pointer to the original IO request + * @nand: Related NAND device, to have access to its memory organization + * @page_buffer_size: Real size of the page buffer to use (can be set by the + * user before the tweaking mechanism initialization) + * @oob_buffer_size: Real size of the OOB buffer to use (can be set by the + * user before the tweaking mechanism initialization) + * @spare_databuf: Data bounce buffer + * @spare_oobbuf: OOB bounce buffer + * @bounce_data: Flag indicating a data bounce buffer is used + * @bounce_oob: Flag indicating an OOB bounce buffer is used + */ +struct nand_ecc_req_tweak_ctx { + struct nand_page_io_req orig_req; + struct nand_device *nand; + unsigned int page_buffer_size; + unsigned int oob_buffer_size; + void *spare_databuf; + void *spare_oobbuf; + bool bounce_data; + bool bounce_oob; +}; + +int nand_ecc_init_req_tweaking(struct nand_ecc_req_tweak_ctx *ctx, + struct nand_device *nand); +void nand_ecc_cleanup_req_tweaking(struct nand_ecc_req_tweak_ctx *ctx); +void nand_ecc_tweak_req(struct nand_ecc_req_tweak_ctx *ctx, + struct nand_page_io_req *req); +void nand_ecc_restore_req(struct nand_ecc_req_tweak_ctx *ctx, + struct nand_page_io_req *req); + /** * struct nand_ecc - Information relative to the ECC * @defaults: Default values, depend on the underlying subsystem -- cgit v1.2.3 From cdbe8df5e28e452c232c0c16b205edfd390d28e5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:06 +0200 Subject: mtd: nand: ecc-bch: Move BCH code to the generic NAND layer BCH ECC code might be later re-used by the SPI NAND layer. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-3-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 66 +++++++++++++++++++++++++++++++++++++ include/linux/mtd/nand_bch.h | 66 ------------------------------------- 2 files changed, 66 insertions(+), 66 deletions(-) create mode 100644 include/linux/mtd/nand-ecc-sw-bch.h delete mode 100644 include/linux/mtd/nand_bch.h (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h new file mode 100644 index 000000000000..1e1ee3af82b1 --- /dev/null +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2011 Ivan Djelic + * + * This file is the header for the NAND BCH ECC implementation. + */ + +#ifndef __MTD_NAND_ECC_SW_BCH_H__ +#define __MTD_NAND_ECC_SW_BCH_H__ + +struct mtd_info; +struct nand_chip; +struct nand_bch_control; + +#if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) + +static inline int mtd_nand_has_bch(void) { return 1; } + +/* + * Calculate BCH ecc code + */ +int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, + u_char *ecc_code); + +/* + * Detect and correct bit errors + */ +int nand_bch_correct_data(struct nand_chip *chip, u_char *dat, + u_char *read_ecc, u_char *calc_ecc); +/* + * Initialize BCH encoder/decoder + */ +struct nand_bch_control *nand_bch_init(struct mtd_info *mtd); +/* + * Release BCH encoder/decoder resources + */ +void nand_bch_free(struct nand_bch_control *nbc); + +#else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ + +static inline int mtd_nand_has_bch(void) { return 0; } + +static inline int +nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, + u_char *ecc_code) +{ + return -1; +} + +static inline int +nand_bch_correct_data(struct nand_chip *chip, unsigned char *buf, + unsigned char *read_ecc, unsigned char *calc_ecc) +{ + return -ENOTSUPP; +} + +static inline struct nand_bch_control *nand_bch_init(struct mtd_info *mtd) +{ + return NULL; +} + +static inline void nand_bch_free(struct nand_bch_control *nbc) {} + +#endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ + +#endif /* __MTD_NAND_ECC_SW_BCH_H__ */ diff --git a/include/linux/mtd/nand_bch.h b/include/linux/mtd/nand_bch.h deleted file mode 100644 index d5956cc48ba9..000000000000 --- a/include/linux/mtd/nand_bch.h +++ /dev/null @@ -1,66 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright © 2011 Ivan Djelic - * - * This file is the header for the NAND BCH ECC implementation. - */ - -#ifndef __MTD_NAND_BCH_H__ -#define __MTD_NAND_BCH_H__ - -struct mtd_info; -struct nand_chip; -struct nand_bch_control; - -#if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) - -static inline int mtd_nand_has_bch(void) { return 1; } - -/* - * Calculate BCH ecc code - */ -int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code); - -/* - * Detect and correct bit errors - */ -int nand_bch_correct_data(struct nand_chip *chip, u_char *dat, - u_char *read_ecc, u_char *calc_ecc); -/* - * Initialize BCH encoder/decoder - */ -struct nand_bch_control *nand_bch_init(struct mtd_info *mtd); -/* - * Release BCH encoder/decoder resources - */ -void nand_bch_free(struct nand_bch_control *nbc); - -#else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ - -static inline int mtd_nand_has_bch(void) { return 0; } - -static inline int -nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code) -{ - return -1; -} - -static inline int -nand_bch_correct_data(struct nand_chip *chip, unsigned char *buf, - unsigned char *read_ecc, unsigned char *calc_ecc) -{ - return -ENOTSUPP; -} - -static inline struct nand_bch_control *nand_bch_init(struct mtd_info *mtd) -{ - return NULL; -} - -static inline void nand_bch_free(struct nand_bch_control *nbc) {} - -#endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ - -#endif /* __MTD_NAND_BCH_H__ */ -- cgit v1.2.3 From 3c0fe36abebee55821badaa9d6cecd03799f7843 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:08 +0200 Subject: mtd: nand: ecc-bch: Stop exporting the private structure The NAND BCH control structure has nothing to do outside of this driver, all users of the nand_bch_init/free() functions just save it to chip->ecc.priv so do it in this driver directly and return a regular error code instead. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-5-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index 1e1ee3af82b1..b62b8bd4669f 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -10,7 +10,6 @@ struct mtd_info; struct nand_chip; -struct nand_bch_control; #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) @@ -30,11 +29,11 @@ int nand_bch_correct_data(struct nand_chip *chip, u_char *dat, /* * Initialize BCH encoder/decoder */ -struct nand_bch_control *nand_bch_init(struct mtd_info *mtd); +int nand_bch_init(struct nand_chip *chip); /* * Release BCH encoder/decoder resources */ -void nand_bch_free(struct nand_bch_control *nbc); +void nand_bch_free(struct nand_chip *chip); #else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ @@ -54,12 +53,12 @@ nand_bch_correct_data(struct nand_chip *chip, unsigned char *buf, return -ENOTSUPP; } -static inline struct nand_bch_control *nand_bch_init(struct mtd_info *mtd) +static inline int nand_bch_init(struct nand_chip *chip) { - return NULL; + return -ENOTSUPP; } -static inline void nand_bch_free(struct nand_bch_control *nbc) {} +static inline void nand_bch_free(struct nand_chip *chip) {} #endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ -- cgit v1.2.3 From e3010bd3ef1eda13f08155fe43846a64d0990a86 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:09 +0200 Subject: mtd: nand: ecc-bch: Return only valid error codes When a function is not available, returning -ENOTSUPP makes much more sense than returning -1. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-6-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index b62b8bd4669f..d92d2abcfcef 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -43,7 +43,7 @@ static inline int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, u_char *ecc_code) { - return -1; + return -ENOTSUPP; } static inline int -- cgit v1.2.3 From 127aae6077562e3926ebad7c782123c2afe95846 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:10 +0200 Subject: mtd: nand: ecc-bch: Drop mtd_nand_has_bch() Like for any other compilation option, use the IS_ENABLED() macro instead of hardcoding it. By droping this helper we can get rid of the BCH header in nandsim.c. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-7-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index d92d2abcfcef..7b62996d9e01 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -13,8 +13,6 @@ struct nand_chip; #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) -static inline int mtd_nand_has_bch(void) { return 1; } - /* * Calculate BCH ecc code */ @@ -37,8 +35,6 @@ void nand_bch_free(struct nand_chip *chip); #else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ -static inline int mtd_nand_has_bch(void) { return 0; } - static inline int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, u_char *ecc_code) -- cgit v1.2.3 From ea146d7fbf5081b5eb2777df5e30ed70ca68985b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:11 +0200 Subject: mtd: nand: ecc-bch: Update the prototypes to be more generic These functions must be usable by the main NAND core, so their names must be technology-agnostic as well as the parameters. Hence, we pass a generic nand_device instead of a raw nand_chip structure. As it seems that changing the raw NAND functions to always pass a generic NAND device is a lost of time, we prefer to create dedicated raw NAND wrappers that will be useful in the near future to do the translation. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-8-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 45 +++++++++++++------------------------ include/linux/mtd/rawnand.h | 5 +++++ 2 files changed, 21 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index 7b62996d9e01..f0caee3b03d0 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -8,53 +8,40 @@ #ifndef __MTD_NAND_ECC_SW_BCH_H__ #define __MTD_NAND_ECC_SW_BCH_H__ -struct mtd_info; -struct nand_chip; +#include #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) -/* - * Calculate BCH ecc code - */ -int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code); - -/* - * Detect and correct bit errors - */ -int nand_bch_correct_data(struct nand_chip *chip, u_char *dat, - u_char *read_ecc, u_char *calc_ecc); -/* - * Initialize BCH encoder/decoder - */ -int nand_bch_init(struct nand_chip *chip); -/* - * Release BCH encoder/decoder resources - */ -void nand_bch_free(struct nand_chip *chip); +int nand_ecc_sw_bch_calculate(struct nand_device *nand, + const unsigned char *buf, unsigned char *code); +int nand_ecc_sw_bch_correct(struct nand_device *nand, unsigned char *buf, + unsigned char *read_ecc, unsigned char *calc_ecc); +int nand_ecc_sw_bch_init(struct nand_device *nand); +void nand_ecc_sw_bch_cleanup(struct nand_device *nand); #else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ -static inline int -nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code) +static inline int nand_ecc_sw_bch_calculate(struct nand_device *nand, + const unsigned char *buf, + unsigned char *code) { return -ENOTSUPP; } -static inline int -nand_bch_correct_data(struct nand_chip *chip, unsigned char *buf, - unsigned char *read_ecc, unsigned char *calc_ecc) +static inline int nand_ecc_sw_bch_correct(struct nand_device *nand, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) { return -ENOTSUPP; } -static inline int nand_bch_init(struct nand_chip *chip) +static inline int nand_ecc_sw_bch_init(struct nand_device *nand) { return -ENOTSUPP; } -static inline void nand_bch_free(struct nand_chip *chip) {} +static inline void nand_ecc_sw_bch_cleanup(struct nand_device *nand) {} #endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index aac07940de09..23623beaad1d 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1303,6 +1303,11 @@ static inline int nand_opcode_8bits(unsigned int command) return 0; } +int rawnand_sw_bch_init(struct nand_chip *chip); +int rawnand_sw_bch_correct(struct nand_chip *chip, unsigned char *buf, + unsigned char *read_ecc, unsigned char *calc_ecc); +void rawnand_sw_bch_cleanup(struct nand_chip *chip); + int nand_check_erased_ecc_chunk(void *data, int datalen, void *ecc, int ecclen, void *extraoob, int extraooblen, -- cgit v1.2.3 From 80fe603160a4732a08f0f08f3e3312a3f3a79eee Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:12 +0200 Subject: mtd: nand: ecc-bch: Stop using raw NAND structures This code is meant to be reused by the SPI-NAND core. Now that the driver has been cleaned and reorganized, use a generic ECC engine object to store the driver's data instead of accessing members of the nand_chip structure. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-9-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index f0caee3b03d0..ce005528e55f 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -9,6 +9,31 @@ #define __MTD_NAND_ECC_SW_BCH_H__ #include +#include + +/** + * struct nand_ecc_sw_bch_conf - private software BCH ECC engine structure + * @reqooblen: Save the actual user OOB length requested before overwriting it + * @spare_oobbuf: Spare OOB buffer if none is provided + * @code_size: Number of bytes needed to store a code (one code per step) + * @nsteps: Number of steps + * @calc_buf: Buffer to use when calculating ECC bytes + * @code_buf: Buffer to use when reading (raw) ECC bytes from the chip + * @bch: BCH control structure + * @errloc: error location array + * @eccmask: XOR ecc mask, allows erased pages to be decoded as valid + */ +struct nand_ecc_sw_bch_conf { + unsigned int reqooblen; + void *spare_oobbuf; + unsigned int code_size; + unsigned int nsteps; + u8 *calc_buf; + u8 *code_buf; + struct bch_control *bch; + unsigned int *errloc; + unsigned char *eccmask; +}; #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) -- cgit v1.2.3 From 57e3cebd022fbc035dcf190ac789fd2ffc747f5b Mon Sep 17 00:00:00 2001 From: Shenming Lu Date: Sat, 28 Nov 2020 22:18:57 +0800 Subject: KVM: arm64: Delay the polling of the GICR_VPENDBASER.Dirty bit In order to reduce the impact of the VPT parsing happening on the GIC, we can split the vcpu reseidency in two phases: - programming GICR_VPENDBASER: this still happens in vcpu_load() - checking for the VPT parsing to be complete: this can happen on vcpu entry (in kvm_vgic_flush_hwstate()) This allows the GIC and the CPU to work in parallel, rewmoving some of the entry overhead. Suggested-by: Marc Zyngier Signed-off-by: Shenming Lu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201128141857.983-3-lushenming@huawei.com --- include/linux/irqchip/arm-gic-v4.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 6976b8331b60..943c3411ca10 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -39,6 +39,8 @@ struct its_vpe { irq_hw_number_t vpe_db_lpi; /* VPE resident */ bool resident; + /* VPT parse complete */ + bool ready; union { /* GICv4.0 implementations */ struct { @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type { PROP_UPDATE_AND_INV_VLPI, SCHEDULE_VPE, DESCHEDULE_VPE, + COMMIT_VPE, INVALL_VPE, PROP_UPDATE_VSGI, }; @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en); int its_make_vpe_non_resident(struct its_vpe *vpe, bool db); +int its_commit_vpe(struct its_vpe *vpe); int its_invall_vpe(struct its_vpe *vpe); int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map); -- cgit v1.2.3 From bb4c6910c8b41623104c2e64a30615682689a54d Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 26 Nov 2020 09:28:51 +0100 Subject: genirq/irqdomain: Add an irq_create_mapping_affinity() function There is currently no way to convey the affinity of an interrupt via irq_create_mapping(), which creates issues for devices that expect that affinity to be managed by the kernel. In order to sort this out, rename irq_create_mapping() to irq_create_mapping_affinity() with an additional affinity parameter that can be passed down to irq_domain_alloc_descs(). irq_create_mapping() is re-implemented as a wrapper around irq_create_mapping_affinity(). No functional change. Fixes: e75eafb9b039 ("genirq/msi: Switch to new irq spreading infrastructure") Signed-off-by: Laurent Vivier Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kurz Cc: Michael Ellerman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201126082852.1178497-2-lvivier@redhat.com --- include/linux/irqdomain.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 71535e87109f..ea5a337e0f8b 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -384,11 +384,19 @@ extern void irq_domain_associate_many(struct irq_domain *domain, extern void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq); -extern unsigned int irq_create_mapping(struct irq_domain *host, - irq_hw_number_t hwirq); +extern unsigned int irq_create_mapping_affinity(struct irq_domain *host, + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity); extern unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); extern void irq_dispose_mapping(unsigned int virq); +static inline unsigned int irq_create_mapping(struct irq_domain *host, + irq_hw_number_t hwirq) +{ + return irq_create_mapping_affinity(host, hwirq, NULL); +} + + /** * irq_linear_revmap() - Find a linux irq from a hw irq number. * @domain: domain owning this hardware interrupt -- cgit v1.2.3 From 63e2fffa59a9dd91e443b08832656399fd80b7f0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 15 Nov 2020 17:37:37 -0500 Subject: pNFS/flexfiles: Fix array overflow when flexfiles mirroring is enabled If the flexfiles mirroring is enabled, then the read code expects to be able to set pgio->pg_mirror_idx to point to the data server that is being used for this particular read. However it does not change the pg_mirror_count because we only need to send a single read. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index c32c15216da3..f0373a6cb5fb 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -55,6 +55,7 @@ struct nfs_page { unsigned short wb_nio; /* Number of I/O attempts */ }; +struct nfs_pgio_mirror; struct nfs_pageio_descriptor; struct nfs_pageio_ops { void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *); @@ -64,6 +65,9 @@ struct nfs_pageio_ops { unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *, struct nfs_page *); void (*pg_cleanup)(struct nfs_pageio_descriptor *); + struct nfs_pgio_mirror * + (*pg_get_mirror)(struct nfs_pageio_descriptor *, u32); + u32 (*pg_set_mirror)(struct nfs_pageio_descriptor *, u32); }; struct nfs_rw_ops { -- cgit v1.2.3 From 5a7e702670adc368caa1b64c2138956d8cba0d4f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:09 -0400 Subject: SUNRPC: Adjust synopsis of xdr_buf_subsegment() Clean up: This enables xdr_buf_subsegment()'s callers to pass in a const pointer to that buffer. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 9548d075e06d..ec2a22ccdc2a 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -183,7 +183,8 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) */ extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); -extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf, + unsigned int base, unsigned int len); extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -- cgit v1.2.3 From 03493bca084fdca48abc59b00e06ce733aa9eb7d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 10 Jun 2020 10:36:42 -0400 Subject: SUNRPC: Rename svc_encode_read_payload() Clean up: "result payload" is a less confusing name for these payloads. "READ payload" reflects only the NFS usage. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 6 +++--- include/linux/sunrpc/svc_rdma.h | 4 ++-- include/linux/sunrpc/svc_xprt.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 386628b36bc7..c220b734fa69 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -519,9 +519,9 @@ void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); -int svc_encode_read_payload(struct svc_rqst *rqstp, - unsigned int offset, - unsigned int length); +int svc_encode_result_payload(struct svc_rqst *rqstp, + unsigned int offset, + unsigned int length); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, struct kvec *first, size_t total); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 9dc3a3b88391..2b870a3f391b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -207,8 +207,8 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *rctxt, int status); extern int svc_rdma_sendto(struct svc_rqst *); -extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, - unsigned int length); +extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length); /* svc_rdma_transport.c */ extern struct svc_xprt_class svc_rdma_class; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index aca35ab5cff2..92455e0d5244 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -21,8 +21,8 @@ struct svc_xprt_ops { int (*xpo_has_wspace)(struct svc_xprt *); int (*xpo_recvfrom)(struct svc_rqst *); int (*xpo_sendto)(struct svc_rqst *); - int (*xpo_read_payload)(struct svc_rqst *, unsigned int, - unsigned int); + int (*xpo_result_payload)(struct svc_rqst *, unsigned int, + unsigned int); void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); -- cgit v1.2.3 From f6ad77590a5d432589a5d8a211c4e8e50cd8bb63 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:10 -0400 Subject: svcrdma: Post RDMA Writes while XDR encoding replies The only RPC/RDMA ordering requirement between RDMA Writes and RDMA Sends is that the responder must post the Writes on the Send queue before posting the Send that conveys the RPC Reply for that Write payload. The Linux NFS server implementation now has a transport method that can post result Payload Writes earlier than svc_rdma_sendto: ->xpo_result_payload() This gets RDMA Writes going earlier so they are more likely to be complete at the remote end before the Send completes. Some care must be taken with pulled-up Replies. We don't want to push the Write chunk and then send the same payload data via Send. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 2b870a3f391b..f5a3c852bb90 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -183,9 +183,7 @@ extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - __be32 *wr_ch, struct xdr_buf *xdr, - unsigned int offset, - unsigned long length); + __be32 *wr_ch, const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, struct xdr_buf *xdr); -- cgit v1.2.3 From 78147ca8b4a9b6cf0e597ddd6bf17959e08376c2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 22 Jun 2020 10:15:41 -0400 Subject: svcrdma: Add a "parsed chunk list" data structure This simple data structure binds the location of each data payload inside of an RPC message to the chunk that will be used to push it to or pull it from the client. There are several benefits to this small additional overhead: * It enables support for more than one chunk in incoming Read and Write lists. * It translates the version-specific on-the-wire format into a generic in-memory structure, enabling support for multiple versions of the RPC/RDMA transport protocol. * It enables the server to re-organize a chunk list if it needs to adjust where Read chunk data lands in server memory without altering the contents of the XDR-encoded Receive buffer. Construction of these lists is done while sanity checking each incoming RPC/RDMA header. Subsequent patches will make use of the generated data structures. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 12 ++++ include/linux/sunrpc/svc_rdma_pcl.h | 128 ++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 include/linux/sunrpc/svc_rdma_pcl.h (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f5a3c852bb90..a89d4209fe2a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -47,6 +47,8 @@ #include #include #include +#include + #include #include @@ -142,8 +144,18 @@ struct svc_rdma_recv_ctxt { unsigned int rc_page_count; unsigned int rc_hdr_count; u32 rc_inv_rkey; + + struct svc_rdma_pcl rc_call_pcl; + + struct svc_rdma_pcl rc_read_pcl; + __be32 *rc_write_list; + struct svc_rdma_chunk *rc_cur_result_payload; + struct svc_rdma_pcl rc_write_pcl; + __be32 *rc_reply_chunk; + struct svc_rdma_pcl rc_reply_pcl; + unsigned int rc_read_payload_offset; unsigned int rc_read_payload_length; struct page *rc_pages[RPCSVC_MAXPAGES]; diff --git a/include/linux/sunrpc/svc_rdma_pcl.h b/include/linux/sunrpc/svc_rdma_pcl.h new file mode 100644 index 000000000000..7516ad0fae80 --- /dev/null +++ b/include/linux/sunrpc/svc_rdma_pcl.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020, Oracle and/or its affiliates + */ + +#ifndef SVC_RDMA_PCL_H +#define SVC_RDMA_PCL_H + +#include + +struct svc_rdma_segment { + u32 rs_handle; + u32 rs_length; + u64 rs_offset; +}; + +struct svc_rdma_chunk { + struct list_head ch_list; + + u32 ch_position; + u32 ch_length; + u32 ch_payload_length; + + u32 ch_segcount; + struct svc_rdma_segment ch_segments[]; +}; + +struct svc_rdma_pcl { + unsigned int cl_count; + struct list_head cl_chunks; +}; + +/** + * pcl_init - Initialize a parsed chunk list + * @pcl: parsed chunk list to initialize + * + */ +static inline void pcl_init(struct svc_rdma_pcl *pcl) +{ + INIT_LIST_HEAD(&pcl->cl_chunks); +} + +/** + * pcl_is_empty - Return true if parsed chunk list is empty + * @pcl: parsed chunk list + * + */ +static inline bool pcl_is_empty(const struct svc_rdma_pcl *pcl) +{ + return list_empty(&pcl->cl_chunks); +} + +/** + * pcl_first_chunk - Return first chunk in a parsed chunk list + * @pcl: parsed chunk list + * + * Returns the first chunk in the list, or NULL if the list is empty. + */ +static inline struct svc_rdma_chunk * +pcl_first_chunk(const struct svc_rdma_pcl *pcl) +{ + if (pcl_is_empty(pcl)) + return NULL; + return list_first_entry(&pcl->cl_chunks, struct svc_rdma_chunk, + ch_list); +} + +/** + * pcl_next_chunk - Return next chunk in a parsed chunk list + * @pcl: a parsed chunk list + * @chunk: chunk in @pcl + * + * Returns the next chunk in the list, or NULL if @chunk is already last. + */ +static inline struct svc_rdma_chunk * +pcl_next_chunk(const struct svc_rdma_pcl *pcl, struct svc_rdma_chunk *chunk) +{ + if (list_is_last(&chunk->ch_list, &pcl->cl_chunks)) + return NULL; + return list_next_entry(chunk, ch_list); +} + +/** + * pcl_for_each_chunk - Iterate over chunks in a parsed chunk list + * @pos: the loop cursor + * @pcl: a parsed chunk list + */ +#define pcl_for_each_chunk(pos, pcl) \ + for (pos = list_first_entry(&(pcl)->cl_chunks, struct svc_rdma_chunk, ch_list); \ + &pos->ch_list != &(pcl)->cl_chunks; \ + pos = list_next_entry(pos, ch_list)) + +/** + * pcl_for_each_segment - Iterate over segments in a parsed chunk + * @pos: the loop cursor + * @chunk: a parsed chunk + */ +#define pcl_for_each_segment(pos, chunk) \ + for (pos = &(chunk)->ch_segments[0]; \ + pos <= &(chunk)->ch_segments[(chunk)->ch_segcount - 1]; \ + pos++) + +/** + * pcl_chunk_end_offset - Return offset of byte range following @chunk + * @chunk: chunk in @pcl + * + * Returns starting offset of the region just after @chunk + */ +static inline unsigned int +pcl_chunk_end_offset(const struct svc_rdma_chunk *chunk) +{ + return xdr_align_size(chunk->ch_position + chunk->ch_payload_length); +} + +struct svc_rdma_recv_ctxt; + +extern void pcl_free(struct svc_rdma_pcl *pcl); +extern bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p); +extern bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p); +extern bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt, + struct svc_rdma_pcl *pcl, __be32 *p); +extern int pcl_process_nonpayloads(const struct svc_rdma_pcl *pcl, + const struct xdr_buf *xdr, + int (*actor)(const struct xdr_buf *, + void *), + void *data); + +#endif /* SVC_RDMA_PCL_H */ -- cgit v1.2.3 From 58b2e0fefa891c99f297120c8c062a35005dc562 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 22 Mar 2020 13:06:55 -0400 Subject: svcrdma: Use parsed chunk lists to detect reverse direction replies Refactor: Don't duplicate header decoding smarts here. Instead, use the new parsed chunk lists. Note that the XID sanity test is also removed. The XID is already looked up by the cb handler, and is rejected if it's not recognized. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index a89d4209fe2a..74247a33b6c6 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -144,6 +144,7 @@ struct svc_rdma_recv_ctxt { unsigned int rc_page_count; unsigned int rc_hdr_count; u32 rc_inv_rkey; + __be32 rc_msgtype; struct svc_rdma_pcl rc_call_pcl; -- cgit v1.2.3 From 7a1cbfa18059a40d4752dab057384c3ca2de326c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 17 Jun 2020 11:07:00 -0400 Subject: svcrdma: Use parsed chunk lists to construct RDMA Writes Refactor: Instead of re-parsing the ingress RPC Call transport header when constructing RDMA Writes, use the new parsed chunk lists for the Write list and Reply chunk, which are version-agnostic and already XDR-decoded. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 74247a33b6c6..d9148787efff 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -157,8 +157,6 @@ struct svc_rdma_recv_ctxt { __be32 *rc_reply_chunk; struct svc_rdma_pcl rc_reply_pcl; - unsigned int rc_read_payload_offset; - unsigned int rc_read_payload_length; struct page *rc_pages[RPCSVC_MAXPAGES]; }; @@ -196,7 +194,8 @@ extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - __be32 *wr_ch, const struct xdr_buf *xdr); + const struct svc_rdma_chunk *chunk, + const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, struct xdr_buf *xdr); -- cgit v1.2.3 From 9d0b09d5ef0c842592a5df3a5b8b59124485ff1b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:11 -0400 Subject: svcrdma: Support multiple write chunks when pulling up When counting the number of SGEs needed to construct a Send request, do not count result payloads. And, when copying the Reply message into the pull-up buffer, result payloads are not to be copied to the Send buffer. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d9148787efff..7090af1a9791 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -182,6 +182,8 @@ extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, /* svc_rdma_recvfrom.c */ extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); +extern struct svc_rdma_recv_ctxt * + svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt); extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); -- cgit v1.2.3 From 2371bcc056647327445150d6df0502d92ad68439 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 Mar 2020 13:29:27 -0400 Subject: svcrdma: Support multiple Write chunks in svc_rdma_map_reply_msg() Refactor: svc_rdma_map_reply_msg() is restructured to DMA map only the parts of rq_res that do not contain a result payload. This change has been tested to confirm that it does not cause a regression in the no Write chunk and single Write chunk cases. Multiple Write chunks have not been tested. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7090af1a9791..e09fafba00d7 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -213,7 +213,7 @@ extern int svc_rdma_send(struct svcxprt_rdma *rdma, extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_recv_ctxt *rctxt, - struct xdr_buf *xdr); + const struct xdr_buf *xdr); extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, struct svc_rdma_recv_ctxt *rctxt, -- cgit v1.2.3 From 41bc163ffe0fe67cba3fff2f5e8c58caa9e46a1e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 Mar 2020 13:29:28 -0400 Subject: svcrdma: Support multiple Write chunks in svc_rdma_send_reply_chunk Refactor svc_rdma_send_reply_chunk() so that it Sends only the parts of rq_res that do not contain a result payload. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index e09fafba00d7..85fbec47d4b5 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -200,7 +200,7 @@ extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, - struct xdr_buf *xdr); + const struct xdr_buf *xdr); /* svc_rdma_sendto.c */ extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); -- cgit v1.2.3 From 7954c8503b8709660d93505a40f1847634d9c3ba Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 17 Jun 2020 15:19:36 -0400 Subject: svcrdma: Remove chunk list pointers Clean up: These pointers are no longer used. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 85fbec47d4b5..6f247d043731 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -149,12 +149,8 @@ struct svc_rdma_recv_ctxt { struct svc_rdma_pcl rc_call_pcl; struct svc_rdma_pcl rc_read_pcl; - - __be32 *rc_write_list; struct svc_rdma_chunk *rc_cur_result_payload; struct svc_rdma_pcl rc_write_pcl; - - __be32 *rc_reply_chunk; struct svc_rdma_pcl rc_reply_pcl; struct page *rc_pages[RPCSVC_MAXPAGES]; -- cgit v1.2.3 From d96962e6d0e281bab6a48e83b42f5dce6eb28bf4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Sep 2020 13:04:17 -0400 Subject: svcrdma: Use the new parsed chunk list when pulling Read chunks As a pre-requisite for handling multiple Read chunks in each Read list, convert svc_rdma_recv_read_chunk() to use the new parsed Read chunk list. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 6f247d043731..294b56e61522 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -188,15 +188,15 @@ extern int svc_rdma_recvfrom(struct svc_rqst *); /* svc_rdma_rw.c */ extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); -extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, - struct svc_rqst *rqstp, - struct svc_rdma_recv_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_chunk *chunk, const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, const struct xdr_buf *xdr); +extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, + struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head); /* svc_rdma_sendto.c */ extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); -- cgit v1.2.3 From 0ae4c3e8a64ace1b8d7de033b0751afe43024416 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Nov 2020 15:52:47 -0500 Subject: SUNRPC: Add xdr_set_scratch_page() and xdr_reset_scratch_buffer() Clean up: De-duplicate some frequently-used code. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index ec2a22ccdc2a..2729d2d6efce 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -248,7 +248,6 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); -extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); @@ -256,6 +255,49 @@ extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned in extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); +/** + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to an empty buffer + * @buflen: size of 'buf' + * + * The scratch buffer is used when decoding from an array of pages. + * If an xdr_inline_decode() call spans across page boundaries, then + * we copy the data into the scratch buffer in order to allow linear + * access. + */ +static inline void +xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) +{ + xdr->scratch.iov_base = buf; + xdr->scratch.iov_len = buflen; +} + +/** + * xdr_set_scratch_page - Attach a scratch buffer for decoding data + * @xdr: pointer to xdr_stream struct + * @page: an anonymous page + * + * See xdr_set_scratch_buffer(). + */ +static inline void +xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page) +{ + xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE); +} + +/** + * xdr_reset_scratch_buffer - Clear scratch buffer information + * @xdr: pointer to xdr_stream struct + * + * See xdr_set_scratch_buffer(). + */ +static inline void +xdr_reset_scratch_buffer(struct xdr_stream *xdr) +{ + xdr_set_scratch_buffer(xdr, NULL, 0); +} + /** * xdr_stream_remaining - Return the number of bytes remaining in the stream * @xdr: pointer to struct xdr_stream -- cgit v1.2.3 From 5191955d6fc65e6d4efe8f4f10a6028298f57281 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 5 Nov 2020 11:19:42 -0500 Subject: SUNRPC: Prepare for xdr_stream-style decoding on the server-side A "permanent" struct xdr_stream is allocated in struct svc_rqst so that it is usable by all server-side decoders. A per-rqst scratch buffer is also allocated to handle decoding XDR data items that cross page boundaries. To demonstrate how it will be used, add the first call site for the new svcxdr_init_decode() API. As an additional part of the overall conversion, add symbolic constants for successful and failed XDR operations. Returning "0" is overloaded. Sometimes it means something failed, but sometimes it means success. To make it more clear when XDR decoding functions succeed or fail, introduce symbolic constants. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index c220b734fa69..34c2a69820e9 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -247,6 +247,8 @@ struct svc_rqst { size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; + struct xdr_stream rq_arg_stream; + struct page *rq_scratch_page; struct xdr_buf rq_res; struct page *rq_pages[RPCSVC_MAXPAGES + 1]; struct page * *rq_respages; /* points into rq_pages */ @@ -557,4 +559,18 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) svc_reserve(rqstp, space + rqstp->rq_auth_slack); } +/** + * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding + * @rqstp: controlling server RPC transaction context + * + */ +static inline void svcxdr_init_decode(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_arg_stream; + struct kvec *argv = rqstp->rq_arg.head; + + xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL); + xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); +} + #endif /* SUNRPC_SVC_H */ -- cgit v1.2.3 From c1346a1216ab5cb04a265380ac9035d91b16b6d5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 11:54:23 -0500 Subject: NFSD: Replace the internals of the READ_BUF() macro Convert the READ_BUF macro in nfs4xdr.c from open code to instead use the new xdr_stream-style decoders already in use by the encode side (and by the in-kernel NFS client implementation). Once this conversion is done, each individual NFSv4 argument decoder can be independently cleaned up to replace these macros with C code. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2729d2d6efce..cc64cd0891f1 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -254,6 +254,8 @@ extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); +extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, + unsigned int len); /** * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. -- cgit v1.2.3 From cbd9abb3706e96563b36af67595707a7054ab693 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:19:51 -0500 Subject: NFSD: Replace READ* macros in nfsd4_decode_commit() Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index cc64cd0891f1..cc669d95c484 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -571,6 +571,27 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) return 0; } +/** + * xdr_stream_decode_u64 - Decode a 64-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store 64-bit integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + xdr_decode_hyper(p, ptr); + return 0; +} + /** * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data * @xdr: pointer to xdr_stream -- cgit v1.2.3 From 8918cc0d2b72db9997390626010b182c4500d749 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 17:16:52 -0500 Subject: NFSD: Add helper for decoding locker4 Refactor for clarity. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index cc669d95c484..9b35ce50cf2b 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -550,6 +550,27 @@ static inline bool xdr_item_is_present(const __be32 *p) return *p != xdr_zero; } +/** + * xdr_stream_decode_bool - Decode a boolean + * @xdr: pointer to xdr_stream + * @ptr: pointer to a u32 in which to store the result + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = (*p != xdr_zero); + return 0; +} + /** * xdr_stream_decode_u32 - Decode a 32-bit integer * @xdr: pointer to xdr_stream -- cgit v1.2.3 From 7fd3253a7de6a317a0683f83739479fb880bffc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 30 Nov 2020 19:51:56 +0100 Subject: net: Introduce preferred busy-polling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing busy-polling mode, enabled by the SO_BUSY_POLL socket option or system-wide using the /proc/sys/net/core/busy_read knob, is an opportunistic. That means that if the NAPI context is not scheduled, it will poll it. If, after busy-polling, the budget is exceeded the busy-polling logic will schedule the NAPI onto the regular softirq handling. One implication of the behavior above is that a busy/heavy loaded NAPI context will never enter/allow for busy-polling. Some applications prefer that most NAPI processing would be done by busy-polling. This series adds a new socket option, SO_PREFER_BUSY_POLL, that works in concert with the napi_defer_hard_irqs and gro_flush_timeout knobs. The napi_defer_hard_irqs and gro_flush_timeout knobs were introduced in commit 6f8b12d661d0 ("net: napi: add hard irqs deferral feature"), and allows for a user to defer interrupts to be enabled and instead schedule the NAPI context from a watchdog timer. When a user enables the SO_PREFER_BUSY_POLL, again with the other knobs enabled, and the NAPI context is being processed by a softirq, the softirq NAPI processing will exit early to allow the busy-polling to be performed. If the application stops performing busy-polling via a system call, the watchdog timer defined by gro_flush_timeout will timeout, and regular softirq handling will resume. In summary; Heavy traffic applications that prefer busy-polling over softirq processing should use this option. Example usage: $ echo 2 | sudo tee /sys/class/net/ens785f1/napi_defer_hard_irqs $ echo 200000 | sudo tee /sys/class/net/ens785f1/gro_flush_timeout Note that the timeout should be larger than the userspace processing window, otherwise the watchdog will timeout and fall back to regular softirq processing. Enable the SO_BUSY_POLL/SO_PREFER_BUSY_POLL options on your socket. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/20201130185205.196029-2-bjorn.topel@gmail.com --- include/linux/netdevice.h | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ce648a564f7..52d1cc2bd8a7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -350,23 +350,25 @@ struct napi_struct { }; enum { - NAPI_STATE_SCHED, /* Poll is scheduled */ - NAPI_STATE_MISSED, /* reschedule a napi */ - NAPI_STATE_DISABLE, /* Disable pending */ - NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ - NAPI_STATE_LISTED, /* NAPI added to system lists */ - NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ - NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ + NAPI_STATE_SCHED, /* Poll is scheduled */ + NAPI_STATE_MISSED, /* reschedule a napi */ + NAPI_STATE_DISABLE, /* Disable pending */ + NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ + NAPI_STATE_LISTED, /* NAPI added to system lists */ + NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */ + NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ + NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ }; enum { - NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED), - NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), - NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), - NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), - NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED), - NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), - NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), + NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED), + NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), + NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), + NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), + NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED), + NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), + NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), + NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), }; enum gro_result { @@ -437,6 +439,11 @@ static inline bool napi_disable_pending(struct napi_struct *n) return test_bit(NAPI_STATE_DISABLE, &n->state); } +static inline bool napi_prefer_busy_poll(struct napi_struct *n) +{ + return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); +} + bool napi_schedule_prep(struct napi_struct *n); /** -- cgit v1.2.3 From 147ad605dc12c515c97136899ccb5c70e6c674e1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:16 +0100 Subject: init: use type alignment for kernel parameters Specify type alignment for kernel parameters instead of sizeof(long). The alignment attribute is used to prevent gcc from increasing the alignment of objects with static extent as an optimisation, something which would mess up the __setup array stride. Using __alignof__(struct obs_kernel_param) rather than sizeof(long) is preferred since it better indicates why it is there and doesn't break should the type size or alignment change. Note that on m68k the alignment of struct obs_kernel_param is actually two and that adding a 1- or 2-byte field to the 12-byte struct would cause a breakage with the current 4-byte alignment. Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/init.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 7b53cb3092ee..e668832ef66a 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -255,7 +255,7 @@ struct obs_kernel_param { __aligned(1) = str; \ static struct obs_kernel_param __setup_##unique_id \ __used __section(".init.setup") \ - __attribute__((aligned((sizeof(long))))) \ + __aligned(__alignof__(struct obs_kernel_param)) \ = { __setup_str_##unique_id, fn, early } #define __setup(str, fn) \ -- cgit v1.2.3 From a2abe7cbd8fe2db5ff386c968e2273d9dc6c468d Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 30 Nov 2020 15:34:41 -0800 Subject: scs: switch to vmapped shadow stacks The kernel currently uses kmem_cache to allocate shadow call stacks, which means an overflows may not be immediately detected and can potentially result in another task's shadow stack to be overwritten. This change switches SCS to use virtually mapped shadow stacks for tasks, which increases shadow stack size to a full page and provides more robust overflow detection, similarly to VMAP_STACK. Signed-off-by: Sami Tolvanen Acked-by: Will Deacon Link: https://lore.kernel.org/r/20201130233442.2562064-2-samitolvanen@google.com Signed-off-by: Will Deacon --- include/linux/scs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scs.h b/include/linux/scs.h index 6dec390cf154..2a506c2a16f4 100644 --- a/include/linux/scs.h +++ b/include/linux/scs.h @@ -15,12 +15,8 @@ #ifdef CONFIG_SHADOW_CALL_STACK -/* - * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit - * architecture) provided ~40% safety margin on stack usage while keeping - * memory allocation overhead reasonable. - */ -#define SCS_SIZE SZ_1K +#define SCS_ORDER 0 +#define SCS_SIZE (PAGE_SIZE << SCS_ORDER) #define GFP_SCS (GFP_KERNEL | __GFP_ZERO) /* An illegal pointer value to mark the end of the shadow stack. */ @@ -33,6 +29,8 @@ #define task_scs(tsk) (task_thread_info(tsk)->scs_base) #define task_scs_sp(tsk) (task_thread_info(tsk)->scs_sp) +void *scs_alloc(int node); +void scs_free(void *s); void scs_init(void); int scs_prepare(struct task_struct *tsk, int node); void scs_release(struct task_struct *tsk); @@ -61,6 +59,8 @@ static inline bool task_scs_end_corrupted(struct task_struct *tsk) #else /* CONFIG_SHADOW_CALL_STACK */ +static inline void *scs_alloc(int node) { return NULL; } +static inline void scs_free(void *s) {} static inline void scs_init(void) {} static inline void scs_task_reset(struct task_struct *tsk) {} static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; } -- cgit v1.2.3 From ac20ffbb0279aae7be48567fb734eae7d050769e Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 30 Nov 2020 15:34:42 -0800 Subject: arm64: scs: use vmapped IRQ and SDEI shadow stacks Use scs_alloc() to allocate also IRQ and SDEI shadow stacks instead of using statically allocated stacks. Signed-off-by: Sami Tolvanen Acked-by: Will Deacon Link: https://lore.kernel.org/r/20201130233442.2562064-3-samitolvanen@google.com [will: Move CONFIG_SHADOW_CALL_STACK check into init_irq_scs()] Signed-off-by: Will Deacon --- include/linux/scs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scs.h b/include/linux/scs.h index 2a506c2a16f4..18122d9e17ff 100644 --- a/include/linux/scs.h +++ b/include/linux/scs.h @@ -22,10 +22,6 @@ /* An illegal pointer value to mark the end of the shadow stack. */ #define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA) -/* Allocate a static per-CPU shadow stack */ -#define DEFINE_SCS(name) \ - DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \ - #define task_scs(tsk) (task_thread_info(tsk)->scs_base) #define task_scs_sp(tsk) (task_thread_info(tsk)->scs_sp) -- cgit v1.2.3 From 57d9352b6c651b090179f8b223b6681275c64a4f Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Sun, 15 Nov 2020 11:51:18 -0800 Subject: fpga: fpga-mgr: Add devm_fpga_mgr_register() API Add a devm_fpga_mgr_register() API that can be used to register a FPGA Manager that was created using devm_fpga_mgr_create(). Introduce a struct fpga_mgr_devres that makes the devres allocation a little bit more readable and gets reused for devm_fpga_mgr_create() devm_fpga_mgr_register(). Reviewed-by: Tom Rix Signed-off-by: Moritz Fischer Link: https://lore.kernel.org/r/20201115195127.284487-2-mdf@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/fpga/fpga-mgr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index e8ca62b2cb5b..2bc3030a69e5 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -198,6 +198,8 @@ void fpga_mgr_free(struct fpga_manager *mgr); int fpga_mgr_register(struct fpga_manager *mgr); void fpga_mgr_unregister(struct fpga_manager *mgr); +int devm_fpga_mgr_register(struct device *dev, struct fpga_manager *mgr); + struct fpga_manager *devm_fpga_mgr_create(struct device *dev, const char *name, const struct fpga_manager_ops *mops, void *priv); -- cgit v1.2.3 From fa69ee5aa48b5b52e8028c2eb486906e9998d081 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 25 Nov 2020 23:48:40 +0100 Subject: net: switch to storing KCOV handle directly in sk_buff It turns out that usage of skb extensions can cause memory leaks. Ido Schimmel reported: "[...] there are instances that blindly overwrite 'skb->extensions' by invoking skb_copy_header() after __alloc_skb()." Therefore, give up on using skb extensions for KCOV handle, and instead directly store kcov_handle in sk_buff. Fixes: 6370cc3bbd8a ("net: add kcov handle to skb extensions") Fixes: 85ce50d337d1 ("net: kcov: don't select SKB_EXTENSIONS when there is no NET") Fixes: 97f53a08cba1 ("net: linux/skbuff.h: combine SKB_EXTENSIONS + KCOV handling") Link: https://lore.kernel.org/linux-wireless/20201121160941.GA485907@shredder.lan/ Reported-by: Ido Schimmel Signed-off-by: Marco Elver Link: https://lore.kernel.org/r/20201125224840.2014773-1-elver@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0a1239819fd2..333bcdc39635 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -701,6 +701,7 @@ typedef unsigned char *sk_buff_data_t; * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header + * @kcov_handle: KCOV remote handle for remote coverage collection * @tail: Tail pointer * @end: End pointer * @head: Head of buffer @@ -904,6 +905,10 @@ struct sk_buff { __u16 network_header; __u16 mac_header; +#ifdef CONFIG_KCOV + u64 kcov_handle; +#endif + /* private: */ __u32 headers_end[0]; /* public: */ @@ -4150,9 +4155,6 @@ enum skb_ext_id { #endif #if IS_ENABLED(CONFIG_MPTCP) SKB_EXT_MPTCP, -#endif -#if IS_ENABLED(CONFIG_KCOV) - SKB_EXT_KCOV_HANDLE, #endif SKB_EXT_NUM, /* must be last */ }; @@ -4608,35 +4610,22 @@ static inline void skb_reset_redirect(struct sk_buff *skb) #endif } -#if IS_ENABLED(CONFIG_KCOV) && IS_ENABLED(CONFIG_SKB_EXTENSIONS) static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { - /* Do not allocate skb extensions only to set kcov_handle to zero - * (as it is zero by default). However, if the extensions are - * already allocated, update kcov_handle anyway since - * skb_set_kcov_handle can be called to zero a previously set - * value. - */ - if (skb_has_extensions(skb) || kcov_handle) { - u64 *kcov_handle_ptr = skb_ext_add(skb, SKB_EXT_KCOV_HANDLE); - - if (kcov_handle_ptr) - *kcov_handle_ptr = kcov_handle; - } +#ifdef CONFIG_KCOV + skb->kcov_handle = kcov_handle; +#endif } static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { - u64 *kcov_handle = skb_ext_find(skb, SKB_EXT_KCOV_HANDLE); - - return kcov_handle ? *kcov_handle : 0; -} +#ifdef CONFIG_KCOV + return skb->kcov_handle; #else -static inline void skb_set_kcov_handle(struct sk_buff *skb, - const u64 kcov_handle) { } -static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { return 0; } -#endif /* CONFIG_KCOV && CONFIG_SKB_EXTENSIONS */ + return 0; +#endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From 53ffabfd4ddb3a24c5603ae82eefb5537ecb5c20 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 9 Nov 2020 18:24:03 -0800 Subject: block: move blk_rq_bio_prep() to linux/blk-mq.h This is a preparation patch to have minimal block layer request bio append functionality in the context of the NVMeOF Passthru driver which falls in the fast path and doesn't need calls from blk_rq_append_bio(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig --- include/linux/blk-mq.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 794b2a33a2c3..e7482e6ad3ec 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -593,6 +593,18 @@ static inline void blk_mq_cleanup_rq(struct request *rq) rq->q->mq_ops->cleanup_rq(rq); } +static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, + unsigned int nr_segs) +{ + rq->nr_phys_segments = nr_segs; + rq->__data_len = bio->bi_iter.bi_size; + rq->bio = rq->biotail = bio; + rq->ioprio = bio_prio(bio); + + if (bio->bi_disk) + rq->rq_disk = bio->bi_disk; +} + blk_qc_t blk_mq_submit_bio(struct bio *bio); #endif -- cgit v1.2.3 From b6f8ed33ab2bbc58e40fb1e2fb0f9c90cab04baf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 16 Oct 2020 15:20:41 +0200 Subject: pstore/blk: remove {un,}register_pstore_blk This interface is entirely unused, so remove them and various bits of unreachable code. Signed-off-by: Christoph Hellwig Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201016132047.3068029-4-hch@lst.de --- include/linux/pstore_blk.h | 42 ------------------------------------------ 1 file changed, 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h index 61e914522b01..99564f93d774 100644 --- a/include/linux/pstore_blk.h +++ b/include/linux/pstore_blk.h @@ -7,48 +7,6 @@ #include #include -/** - * typedef pstore_blk_panic_write_op - panic write operation to block device - * - * @buf: the data to write - * @start_sect: start sector to block device - * @sects: sectors count on buf - * - * Return: On success, zero should be returned. Others excluding -ENOMSG - * mean error. -ENOMSG means to try next zone. - * - * Panic write to block device must be aligned to SECTOR_SIZE. - */ -typedef int (*pstore_blk_panic_write_op)(const char *buf, sector_t start_sect, - sector_t sects); - -/** - * struct pstore_blk_info - pstore/blk registration details - * - * @major: Which major device number to support with pstore/blk - * @flags: The supported PSTORE_FLAGS_* from linux/pstore.h. - * @panic_write:The write operation only used for the panic case. - * This can be NULL, but is recommended to avoid losing - * crash data if the kernel's IO path or work queues are - * broken during a panic. - * @devt: The dev_t that pstore/blk has attached to. - * @nr_sects: Number of sectors on @devt. - * @start_sect: Starting sector on @devt. - */ -struct pstore_blk_info { - unsigned int major; - unsigned int flags; - pstore_blk_panic_write_op panic_write; - - /* Filled in by pstore/blk after registration. */ - dev_t devt; - sector_t nr_sects; - sector_t start_sect; -}; - -int register_pstore_blk(struct pstore_blk_info *info); -void unregister_pstore_blk(unsigned int major); - /** * struct pstore_device_info - back-end pstore/blk driver structure. * -- cgit v1.2.3 From 60b498852bf219c0bf2b0864c69972840978ca43 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Nov 2020 15:21:18 +0100 Subject: fs: remove get_super_thawed and get_super_exclusive_thawed Just open code the wait in the only caller of both functions. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/fs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8667d0cdc71e..a61df0dd4f19 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1409,7 +1409,7 @@ enum { struct sb_writers { int frozen; /* Is sb frozen? */ - wait_queue_head_t wait_unfrozen; /* for get_super_thawed() */ + wait_queue_head_t wait_unfrozen; /* wait for thaw */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -3132,8 +3132,6 @@ extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); -extern struct super_block *get_super_thawed(struct block_device *); -extern struct super_block *get_super_exclusive_thawed(struct block_device *bdev); extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); -- cgit v1.2.3 From 040f04bd2e825f1d80b14a0e0ac3d830339eb779 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 11:54:06 +0100 Subject: fs: simplify freeze_bdev/thaw_bdev Store the frozen superblock in struct block_device to avoid the awkward interface that can return a sb only used a cookie, an ERR_PTR or NULL. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Acked-by: Chao Yu [f2fs] Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + include/linux/blkdev.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d9b69bbde5cc..ebfb4e7c1fd1 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -46,6 +46,7 @@ struct block_device { int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; + struct super_block *bd_fsfreeze_sb; } __randomize_layout; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 05b346a68c2e..12810a19edeb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2020,7 +2020,7 @@ static inline int sync_blockdev(struct block_device *bdev) #endif int fsync_bdev(struct block_device *bdev); -struct super_block *freeze_bdev(struct block_device *bdev); -int thaw_bdev(struct block_device *bdev, struct super_block *sb); +int freeze_bdev(struct block_device *bdev); +int thaw_bdev(struct block_device *bdev); #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From b601d148a16ea16dfbaf3600be35ee175847a09b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Nov 2020 16:21:52 +0100 Subject: block: remove a duplicate __disk_get_part prototype Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Reviewed-by: Jan Kara Reviewed-by: Greg Kroah-Hartman Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 46553d6d6025..22f5b9fd96f8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -250,7 +250,6 @@ static inline dev_t part_devt(struct hd_struct *part) return part_to_dev(part)->devt; } -extern struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); static inline void disk_put_part(struct hd_struct *part) -- cgit v1.2.3 From 8d65269fe8065fee889bca5b204d711b0695a8f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 17 Nov 2020 08:18:55 +0100 Subject: block: add a bdev_kobj helper Add a little helper to find the kobject for a struct block_device. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Acked-by: Tejun Heo Acked-by: Coly Li [bcache] Acked-by: David Sterba [btrfs] Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ebfb4e7c1fd1..9698f459cc65 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -49,6 +49,9 @@ struct block_device { struct super_block *bd_fsfreeze_sb; } __randomize_layout; +#define bdev_kobj(_bdev) \ + (&part_to_dev((_bdev)->bd_part)->kobj) + /* * Block error status values. See block/blk-core:blk_errors for the details. * Alpha cannot write a byte atomically, so we need to use 32-bit value. -- cgit v1.2.3 From c2637e80a09e0d6c698d2771d7230f59c2138122 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 14 Nov 2020 19:19:57 +0100 Subject: init: refactor name_to_dev_t Split each case into a self-contained helper, and move the block dependent code entirely under the pre-existing #ifdef CONFIG_BLOCK. This allows to remove the blk_lookup_devt stub in genhd.h. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 22f5b9fd96f8..ca5e356084c3 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -388,18 +388,13 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, } #endif /* CONFIG_SYSFS */ +dev_t blk_lookup_devt(const char *name, int partno); #ifdef CONFIG_BLOCK void printk_all_partitions(void); -dev_t blk_lookup_devt(const char *name, int partno); #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -static inline dev_t blk_lookup_devt(const char *name, int partno) -{ - dev_t devt = MKDEV(0, 0); - return devt; -} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_GENHD_H */ -- cgit v1.2.3 From 4e7b5671c6a883d94b5428e1a9c141bbd56cb2a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 13:38:40 +0100 Subject: block: remove i_bdev Switch the block device lookup interfaces to directly work with a dev_t so that struct block_device references are only acquired by the blkdev_get variants (and the blk-cgroup special case). This means that we now don't need an extra reference in the inode and can generally simplify handling of struct block_device to keep the lookups contained in the core block layer code. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Acked-by: Coly Li [bcache] Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- include/linux/fs.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12810a19edeb..bdd7339bcda4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1973,7 +1973,7 @@ int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); const char *bdevname(struct block_device *bdev, char *buffer); -struct block_device *lookup_bdev(const char *); +int lookup_bdev(const char *pathname, dev_t *dev); void blkdev_show(struct seq_file *seqf, off_t offset); diff --git a/include/linux/fs.h b/include/linux/fs.h index a61df0dd4f19..b0b358309657 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -696,7 +696,6 @@ struct inode { struct list_head i_devices; union { struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; unsigned i_dir_seq; -- cgit v1.2.3 From 22ae8ce8b89241c94ac00c237752c0ffa37ba5ae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2020 09:23:26 +0100 Subject: block: simplify bdev/disk lookup in blkdev_get To simplify block device lookup and a few other upcoming areas, make sure that we always have a struct block_device available for each disk and each partition, and only find existing block devices in bdget. The only downside of this is that each device and partition uses a little more memory. The upside will be that a lot of code can be simplified. With that all we need to look up the block device is to lookup the inode and do a few sanity checks on the gendisk, instead of the separate lookup for the gendisk. For blk-cgroup which wants to access a gendisk without opening it, a new blkdev_{get,put}_no_open low-level interface is added to replace the previous get_gendisk use. Note that the change to look up block device directly instead of the two step lookup using struct gendisk causes a subtile change in behavior: accessing a non-existing partition on an existing block device can now cause a call to request_module. That call is harmless, and in practice no recent system will access these nodes as they aren't created by udev and static /dev/ setups are unusual. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 4 ++-- include/linux/blkdev.h | 6 ++++++ include/linux/genhd.h | 7 ++++--- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index c8fc9792ac77..b9f3c246c3c9 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -197,12 +197,12 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); struct blkg_conf_ctx { - struct gendisk *disk; + struct block_device *bdev; struct blkcg_gq *blkg; char *body; }; -struct gendisk *blkcg_conf_get_disk(char **inputp); +struct block_device *blkcg_conf_open_bdev(char **inputp); int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bdd7339bcda4..5d48b92f5e43 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1994,6 +1994,12 @@ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); +/* just for blk-cgroup, don't use elsewhere */ +struct block_device *blkdev_get_no_open(dev_t dev); +void blkdev_put_no_open(struct block_device *bdev); + +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); +void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ca5e356084c3..42a51653c730 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -65,6 +65,7 @@ struct hd_struct { struct disk_stats __percpu *dkstats; struct percpu_ref ref; + struct block_device *bdev; struct device __dev; struct kobject *holder_dir; int policy, partno; @@ -193,7 +194,6 @@ struct gendisk { int flags; unsigned long state; #define GD_NEED_PART_SCAN 0 - struct rw_semaphore lookup_sem; struct kobject *slave_dir; struct timer_rand_state *random; @@ -300,7 +300,6 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk) } extern void del_gendisk(struct gendisk *gp); -extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_disk_ro(struct gendisk *disk, int flag); @@ -338,7 +337,6 @@ int blk_drop_partitions(struct block_device *bdev); extern struct gendisk *__alloc_disk_node(int minors, int node_id); extern void put_disk(struct gendisk *disk); -extern void put_disk_and_module(struct gendisk *disk); #define alloc_disk_node(minors, node_id) \ ({ \ @@ -388,7 +386,10 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, } #endif /* CONFIG_SYSFS */ +extern struct rw_semaphore bdev_lookup_sem; + dev_t blk_lookup_devt(const char *name, int partno); +void blk_request_module(dev_t devt); #ifdef CONFIG_BLOCK void printk_all_partitions(void); #else /* CONFIG_BLOCK */ -- cgit v1.2.3 From a954ea812018a84d350b316c39a2be3edc4b7ca8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 13:29:55 +0100 Subject: block: remove ->bd_contains Now that each hd_struct has a reference to the corresponding block_device, there is no need for the bd_contains pointer. Add a bdev_whole() helper to look up the whole device block_device struture instead. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 9698f459cc65..2e0a9bd9688d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -32,7 +32,6 @@ struct block_device { #ifdef CONFIG_SYSFS struct list_head bd_holder_disks; #endif - struct block_device * bd_contains; u8 bd_partno; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ @@ -49,6 +48,9 @@ struct block_device { struct super_block *bd_fsfreeze_sb; } __randomize_layout; +#define bdev_whole(_bdev) \ + ((_bdev)->bd_disk->part0.bdev) + #define bdev_kobj(_bdev) \ (&part_to_dev((_bdev)->bd_part)->kobj) -- cgit v1.2.3 From 37c3fc9abb25cd767ad5b048358336ac89488c16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Nov 2020 21:20:08 +0100 Subject: block: simplify the block device claiming interface Stop passing the whole device as a separate argument given that it can be trivially deducted and cleanup the !holder debug check. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5d48b92f5e43..43a25d855e04 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1988,10 +1988,8 @@ void blkdev_show(struct seq_file *seqf, off_t offset); struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); -int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, - void *holder); -void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, - void *holder); +int bd_prepare_to_claim(struct block_device *bdev, void *holder); +void bd_abort_claiming(struct block_device *bdev, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); /* just for blk-cgroup, don't use elsewhere */ -- cgit v1.2.3 From c64dc3bd87097e7f08b9437819440f8bfddef995 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 16:38:15 +0100 Subject: block: simplify part_to_disk Now that struct hd_struct has a block_device pointer use that to find the disk. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 42a51653c730..6ba91ee54cb2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -218,13 +218,9 @@ struct gendisk { static inline struct gendisk *part_to_disk(struct hd_struct *part) { - if (likely(part)) { - if (part->partno) - return dev_to_disk(part_to_dev(part)->parent); - else - return dev_to_disk(part_to_dev(part)); - } - return NULL; + if (unlikely(!part)) + return NULL; + return part->bdev->bd_disk; } static inline int disk_max_parts(struct gendisk *disk) -- cgit v1.2.3 From a782483cc1f875355690625d8253a232f2581418 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2020 18:43:37 +0100 Subject: block: remove the nr_sects field in struct hd_struct Now that the hd_struct always has a block device attached to it, there is no need for having two size field that just get out of sync. Additionally the field in hd_struct did not use proper serialization, possibly allowing for torn writes. By only using the block_device field this problem also gets fixed. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Coly Li [bcache] Acked-by: Chao Yu [f2fs] Signed-off-by: Jens Axboe --- include/linux/genhd.h | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6ba91ee54cb2..30d4785b7df8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -52,15 +52,6 @@ struct partition_meta_info { struct hd_struct { sector_t start_sect; - /* - * nr_sects is protected by sequence counter. One might extend a - * partition while IO is happening to it and update of nr_sects - * can be non-atomic on 32bit machines with 64bit sector_t. - */ - sector_t nr_sects; -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - seqcount_t nr_sects_seq; -#endif unsigned long stamp; struct disk_stats __percpu *dkstats; struct percpu_ref ref; @@ -254,13 +245,6 @@ static inline void disk_put_part(struct hd_struct *part) put_device(part_to_dev(part)); } -static inline void hd_sects_seq_init(struct hd_struct *p) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - seqcount_init(&p->nr_sects_seq); -#endif -} - /* * Smarter partition iterator without context limits. */ @@ -318,13 +302,15 @@ static inline sector_t get_start_sect(struct block_device *bdev) { return bdev->bd_part->start_sect; } -static inline sector_t get_capacity(struct gendisk *disk) + +static inline sector_t bdev_nr_sectors(struct block_device *bdev) { - return disk->part0.nr_sects; + return i_size_read(bdev->bd_inode) >> 9; } -static inline void set_capacity(struct gendisk *disk, sector_t size) + +static inline sector_t get_capacity(struct gendisk *disk) { - disk->part0.nr_sects = size; + return bdev_nr_sectors(disk->part0.bdev); } int bdev_disk_changed(struct block_device *bdev, bool invalidate); @@ -358,10 +344,9 @@ int __register_blkdev(unsigned int major, const char *name, __register_blkdev(major, name, NULL) void unregister_blkdev(unsigned int major, const char *name); -void revalidate_disk_size(struct gendisk *disk, bool verbose); bool bdev_check_media_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); -void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); +void set_capacity(struct gendisk *disk, sector_t size); /* for drivers/char/raw.c: */ int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); -- cgit v1.2.3 From 15e3d2c5cd53298272e59ad9072d3468f9dd3781 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:34:00 +0100 Subject: block: move disk stat accounting to struct block_device Move the dkstats and stamp field to struct block_device in preparation of killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ include/linux/genhd.h | 2 -- include/linux/part_stat.h | 38 +++++++++++++++++++------------------- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 2e0a9bd9688d..520011b95276 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -20,6 +20,8 @@ typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { + struct disk_stats __percpu *bd_stats; + unsigned long bd_stamp; dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 30d4785b7df8..804ac45fbfbc 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -52,8 +52,6 @@ struct partition_meta_info { struct hd_struct { sector_t start_sect; - unsigned long stamp; - struct disk_stats __percpu *dkstats; struct percpu_ref ref; struct block_device *bdev; diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 24125778ef3e..87ad60106e1d 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -25,17 +25,17 @@ struct disk_stats { #define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ - (per_cpu_ptr((part)->dkstats, (cpu))->field) + (per_cpu_ptr((part)->bdev->bd_stats, (cpu))->field) #define part_stat_get(part, field) \ part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ - typeof((part)->dkstats->field) res = 0; \ + typeof((part)->bdev->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ - res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ + res += per_cpu_ptr((part)->bdev->bd_stats, _cpu)->field; \ res; \ }) @@ -44,7 +44,7 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) int i; for_each_possible_cpu(i) - memset(per_cpu_ptr(part->dkstats, i), value, + memset(per_cpu_ptr(part->bdev->bd_stats, i), value, sizeof(struct disk_stats)); } @@ -54,7 +54,7 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ - __this_cpu_add((part)->dkstats->field, addnd) + __this_cpu_add((part)->bdev->bd_stats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ @@ -63,20 +63,20 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) field, addnd); \ } while (0) -#define part_stat_dec(gendiskp, field) \ - part_stat_add(gendiskp, field, -1) -#define part_stat_inc(gendiskp, field) \ - part_stat_add(gendiskp, field, 1) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) +#define part_stat_dec(part, field) \ + part_stat_add(part, field, -1) +#define part_stat_inc(part, field) \ + part_stat_add(part, field, 1) +#define part_stat_sub(part, field, subnd) \ + part_stat_add(part, field, -subnd) -#define part_stat_local_dec(gendiskp, field) \ - local_dec(&(part_stat_get(gendiskp, field))) -#define part_stat_local_inc(gendiskp, field) \ - local_inc(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read(gendiskp, field) \ - local_read(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read_cpu(gendiskp, field, cpu) \ - local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) +#define part_stat_local_dec(part, field) \ + local_dec(&(part_stat_get(part, field))) +#define part_stat_local_inc(part, field) \ + local_inc(&(part_stat_get(part, field))) +#define part_stat_local_read(part, field) \ + local_read(&(part_stat_get(part, field))) +#define part_stat_local_read_cpu(part, field, cpu) \ + local_read(&(part_stat_get_cpu(part, field, cpu))) #endif /* _LINUX_PART_STAT_H */ -- cgit v1.2.3 From 29ff57c61094e7bbd921ab10b5a99dce9a0132e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:34:24 +0100 Subject: block: move the start_sect field to struct block_device Move the start_sect field to struct block_device in preparation of killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + include/linux/blkdev.h | 4 ++-- include/linux/genhd.h | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 520011b95276..a690008f60cd 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -20,6 +20,7 @@ typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { + sector_t bd_start_sect; struct disk_stats __percpu *bd_stats; unsigned long bd_stamp; dev_t bd_dev; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 43a25d855e04..619adea57098 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1488,7 +1488,7 @@ static inline int bdev_alignment_offset(struct block_device *bdev) return -1; if (bdev_is_partition(bdev)) return queue_limit_alignment_offset(&q->limits, - bdev->bd_part->start_sect); + bdev->bd_start_sect); return q->limits.alignment_offset; } @@ -1529,7 +1529,7 @@ static inline int bdev_discard_alignment(struct block_device *bdev) if (bdev_is_partition(bdev)) return queue_limit_discard_alignment(&q->limits, - bdev->bd_part->start_sect); + bdev->bd_start_sect); return q->limits.discard_alignment; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 804ac45fbfbc..50d27f5d38e2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -51,7 +51,6 @@ struct partition_meta_info { }; struct hd_struct { - sector_t start_sect; struct percpu_ref ref; struct block_device *bdev; @@ -298,7 +297,7 @@ extern void rand_initialize_disk(struct gendisk *disk); static inline sector_t get_start_sect(struct block_device *bdev) { - return bdev->bd_part->start_sect; + return bdev->bd_start_sect; } static inline sector_t bdev_nr_sectors(struct block_device *bdev) -- cgit v1.2.3 From 231926dbf0f084211e4ec4f4c006f0bf1f47809a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 12:01:45 +0100 Subject: block: move the partition_meta_info to struct block_device Move the partition_meta_info to struct block_device in preparation for killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ include/linux/genhd.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a690008f60cd..2f8ede04e5a9 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -49,6 +49,8 @@ struct block_device { /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; struct super_block *bd_fsfreeze_sb; + + struct partition_meta_info *bd_meta_info; } __randomize_layout; #define bdev_whole(_bdev) \ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 50d27f5d38e2..30d7076155b4 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -57,7 +57,6 @@ struct hd_struct { struct device __dev; struct kobject *holder_dir; int policy, partno; - struct partition_meta_info *info; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif -- cgit v1.2.3 From 1bdd5ae0251d678488dffcf455d4633c2beef1bc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 19:00:13 +0100 Subject: block: move holder_dir to struct block_device Move the holder_dir field to struct block_device in preparation for kill struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + include/linux/genhd.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 2f8ede04e5a9..c0591e52d7d7 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -35,6 +35,7 @@ struct block_device { #ifdef CONFIG_SYSFS struct list_head bd_holder_disks; #endif + struct kobject *bd_holder_dir; u8 bd_partno; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 30d7076155b4..b4a5c05593b9 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -55,7 +55,6 @@ struct hd_struct { struct block_device *bdev; struct device __dev; - struct kobject *holder_dir; int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; -- cgit v1.2.3 From b309e9936347232c724eaa13f70533128b4864e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 16:28:47 +0100 Subject: block: move make_it_fail to struct block_device Move the make_it_fail flag to struct block_device an turn it into a bool in preparation of killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 +++ include/linux/genhd.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c0591e52d7d7..b237f1e40814 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -52,6 +52,9 @@ struct block_device { struct super_block *bd_fsfreeze_sb; struct partition_meta_info *bd_meta_info; +#ifdef CONFIG_FAIL_MAKE_REQUEST + bool bd_make_it_fail; +#endif } __randomize_layout; #define bdev_whole(_bdev) \ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b4a5c05593b9..349cf6403ccd 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -56,9 +56,6 @@ struct hd_struct { struct block_device *bdev; struct device __dev; int policy, partno; -#ifdef CONFIG_FAIL_MAKE_REQUEST - int make_it_fail; -#endif struct rcu_work rcu_work; }; -- cgit v1.2.3 From 83950d359010a493462d58c712b1124c877d1b3b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 16:36:02 +0100 Subject: block: move the policy field to struct block_device Move the policy field to struct block_device and rename it to the more descriptive bd_read_only. Also turn the field into a bool as it is used as such. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + include/linux/genhd.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index b237f1e40814..758cf71c9aa2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -23,6 +23,7 @@ struct block_device { sector_t bd_start_sect; struct disk_stats __percpu *bd_stats; unsigned long bd_stamp; + bool bd_read_only; /* read-only policy */ dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 349cf6403ccd..dcbf9ef7610e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -55,7 +55,7 @@ struct hd_struct { struct block_device *bdev; struct device __dev; - int policy, partno; + int partno; struct rcu_work rcu_work; }; @@ -278,7 +278,7 @@ extern void set_disk_ro(struct gendisk *disk, int flag); static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.policy; + return disk->part0.bdev->bd_read_only; } extern void disk_block_events(struct gendisk *disk); -- cgit v1.2.3 From cb8432d650fe3be58bb962bc8e602dc405510327 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2020 18:47:17 +0100 Subject: block: allocate struct hd_struct as part of struct bdev_inode Allocate hd_struct together with struct block_device to pre-load the lifetime rule changes in preparation of merging the two structures. Note that part0 was previously embedded into struct gendisk, but is a separate allocation now, and already points to the block_device instead of the hd_struct. The lifetime of struct gendisk is still controlled by the struct device embedded in the part0 hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- include/linux/genhd.h | 14 ++++++-------- include/linux/part_stat.h | 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 758cf71c9aa2..6edea5c16259 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -59,7 +59,7 @@ struct block_device { } __randomize_layout; #define bdev_whole(_bdev) \ - ((_bdev)->bd_disk->part0.bdev) + ((_bdev)->bd_disk->part0) #define bdev_kobj(_bdev) \ (&part_to_dev((_bdev)->bd_part)->kobj) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index dcbf9ef7610e..df7319da013c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -19,11 +19,12 @@ #include #include -#define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev) #define dev_to_part(device) container_of((device), struct hd_struct, __dev) -#define disk_to_dev(disk) (&(disk)->part0.__dev) #define part_to_dev(part) (&((part)->__dev)) +#define dev_to_disk(device) (dev_to_part(device)->bdev->bd_disk) +#define disk_to_dev(disk) (part_to_dev((disk)->part0->bd_part)) + extern const struct device_type disk_type; extern struct device_type part_type; extern struct class block_class; @@ -51,12 +52,9 @@ struct partition_meta_info { }; struct hd_struct { - struct percpu_ref ref; - struct block_device *bdev; struct device __dev; int partno; - struct rcu_work rcu_work; }; /** @@ -168,7 +166,7 @@ struct gendisk { * helpers. */ struct disk_part_tbl __rcu *part_tbl; - struct hd_struct part0; + struct block_device *part0; const struct block_device_operations *fops; struct request_queue *queue; @@ -278,7 +276,7 @@ extern void set_disk_ro(struct gendisk *disk, int flag); static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.bdev->bd_read_only; + return disk->part0->bd_read_only; } extern void disk_block_events(struct gendisk *disk); @@ -302,7 +300,7 @@ static inline sector_t bdev_nr_sectors(struct block_device *bdev) static inline sector_t get_capacity(struct gendisk *disk) { - return bdev_nr_sectors(disk->part0.bdev); + return bdev_nr_sectors(disk->part0); } int bdev_disk_changed(struct block_device *bdev, bool invalidate); diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 87ad60106e1d..680de036691e 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -59,8 +59,8 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ if ((part)->partno) \ - __part_stat_add(&part_to_disk((part))->part0, \ - field, addnd); \ + __part_stat_add(part_to_disk((part))->part0->bd_part, \ + field, addnd); \ } while (0) #define part_stat_dec(part, field) \ -- cgit v1.2.3 From 8446fe9255be821cb38ffd306d7e8edc4b9ea662 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:36:54 +0100 Subject: block: switch partition lookup to use struct block_device Use struct block_device to lookup partitions on a disk. This removes all usage of struct hd_struct from the I/O path. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Coly Li [bcache] Acked-by: Chao Yu [f2fs] Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++---- include/linux/genhd.h | 4 ++-- include/linux/part_stat.h | 17 ++++++++--------- 3 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 619adea57098..1d4be1fc6007 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -191,7 +191,7 @@ struct request { }; struct gendisk *rq_disk; - struct hd_struct *part; + struct block_device *part; #ifdef CONFIG_BLK_RQ_ALLOC_TIME /* Time that the first bio started allocating this request. */ u64 alloc_time_ns; @@ -1943,9 +1943,9 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); -unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, - struct bio *bio); -void part_end_io_acct(struct hd_struct *part, struct bio *bio, +unsigned long part_start_io_acct(struct gendisk *disk, + struct block_device **part, struct bio *bio); +void part_end_io_acct(struct block_device *part, struct bio *bio, unsigned long start_time); /** diff --git a/include/linux/genhd.h b/include/linux/genhd.h index df7319da013c..fe6fee77e2b9 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -131,8 +131,8 @@ enum { struct disk_part_tbl { struct rcu_head rcu_head; int len; - struct hd_struct __rcu *last_lookup; - struct hd_struct __rcu *part[]; + struct block_device __rcu *last_lookup; + struct block_device __rcu *part[]; }; struct disk_events; diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 680de036691e..d2558121d48c 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -25,26 +25,26 @@ struct disk_stats { #define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ - (per_cpu_ptr((part)->bdev->bd_stats, (cpu))->field) + (per_cpu_ptr((part)->bd_stats, (cpu))->field) #define part_stat_get(part, field) \ part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ - typeof((part)->bdev->bd_stats->field) res = 0; \ + typeof((part)->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ - res += per_cpu_ptr((part)->bdev->bd_stats, _cpu)->field; \ + res += per_cpu_ptr((part)->bd_stats, _cpu)->field; \ res; \ }) -static inline void part_stat_set_all(struct hd_struct *part, int value) +static inline void part_stat_set_all(struct block_device *part, int value) { int i; for_each_possible_cpu(i) - memset(per_cpu_ptr(part->bdev->bd_stats, i), value, + memset(per_cpu_ptr(part->bd_stats, i), value, sizeof(struct disk_stats)); } @@ -54,13 +54,12 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ - __this_cpu_add((part)->bdev->bd_stats->field, addnd) + __this_cpu_add((part)->bd_stats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ - if ((part)->partno) \ - __part_stat_add(part_to_disk((part))->part0->bd_part, \ - field, addnd); \ + if ((part)->bd_partno) \ + __part_stat_add(bdev_whole(part), field, addnd); \ } while (0) #define part_stat_dec(part, field) \ -- cgit v1.2.3 From 41e5c81984eac8ce87f2b4f57fec0bd90a049b2b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:37:14 +0100 Subject: block: remove the partno field from struct hd_struct Just use the bd_partno field in struct block_device everywhere. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index fe6fee77e2b9..3c13d4708e3f 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -54,7 +54,6 @@ struct partition_meta_info { struct hd_struct { struct block_device *bdev; struct device __dev; - int partno; }; /** -- cgit v1.2.3 From ad1eaa5344b293552b6ba43f5709c76a9aa14d17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:52:59 +0100 Subject: block: switch disk_part_iter_* to use a struct block_device Switch the partition iter infrastructure to iterate over block_device references instead of hd_struct ones mostly used to get at the block_device. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/genhd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3c13d4708e3f..cd23c80265b2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -244,14 +244,14 @@ static inline void disk_put_part(struct hd_struct *part) struct disk_part_iter { struct gendisk *disk; - struct hd_struct *part; + struct block_device *part; int idx; unsigned int flags; }; extern void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, unsigned int flags); -extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); +struct block_device *disk_part_iter_next(struct disk_part_iter *piter); extern void disk_part_iter_exit(struct disk_part_iter *piter); extern bool disk_has_partitions(struct gendisk *disk); -- cgit v1.2.3 From 0d02129e76edf91cf04fabf1efbc3a9a1f1d729a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Nov 2020 16:43:51 +0100 Subject: block: merge struct block_device and struct hd_struct Instead of having two structures that represent each block device with different life time rules, merge them into a single one. This also greatly simplifies the reference counting rules, as we can use the inode reference count as the main reference count for the new struct block_device, with the device model reference front ending it for device model interaction. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 8 ++++++-- include/linux/blkdev.h | 1 - include/linux/genhd.h | 40 +++++++++------------------------------- 3 files changed, 15 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6edea5c16259..866f74261b3b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -8,6 +8,7 @@ #include #include +#include #include struct bio_set; @@ -30,6 +31,7 @@ struct block_device { struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ void * bd_claiming; + struct device bd_device; void * bd_holder; int bd_holders; bool bd_write_holder; @@ -38,7 +40,6 @@ struct block_device { #endif struct kobject *bd_holder_dir; u8 bd_partno; - struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; @@ -61,8 +62,11 @@ struct block_device { #define bdev_whole(_bdev) \ ((_bdev)->bd_disk->part0) +#define dev_to_bdev(device) \ + container_of((device), struct block_device, bd_device) + #define bdev_kobj(_bdev) \ - (&part_to_dev((_bdev)->bd_part)->kobj) + (&((_bdev)->bd_device.kobj)) /* * Block error status values. See block/blk-core:blk_errors for the details. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1d4be1fc6007..17cedf0dc83d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1999,7 +1999,6 @@ void blkdev_put_no_open(struct block_device *bdev); struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); -struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index cd23c80265b2..809aaa32d53c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -19,12 +19,6 @@ #include #include -#define dev_to_part(device) container_of((device), struct hd_struct, __dev) -#define part_to_dev(part) (&((part)->__dev)) - -#define dev_to_disk(device) (dev_to_part(device)->bdev->bd_disk) -#define disk_to_dev(disk) (part_to_dev((disk)->part0->bd_part)) - extern const struct device_type disk_type; extern struct device_type part_type; extern struct class block_class; @@ -51,11 +45,6 @@ struct partition_meta_info { u8 volname[PARTITION_META_INFO_VOLNAMELTH]; }; -struct hd_struct { - struct block_device *bdev; - struct device __dev; -}; - /** * DOC: genhd capability flags * @@ -190,19 +179,21 @@ struct gendisk { struct lockdep_map lockdep_map; }; +/* + * The gendisk is refcounted by the part0 block_device, and the bd_device + * therein is also used for device model presentation in sysfs. + */ +#define dev_to_disk(device) \ + (dev_to_bdev(device)->bd_disk) +#define disk_to_dev(disk) \ + (&((disk)->part0->bd_device)) + #if IS_REACHABLE(CONFIG_CDROM) #define disk_to_cdi(disk) ((disk)->cdi) #else #define disk_to_cdi(disk) NULL #endif -static inline struct gendisk *part_to_disk(struct hd_struct *part) -{ - if (unlikely(!part)) - return NULL; - return part->bdev->bd_disk; -} - static inline int disk_max_parts(struct gendisk *disk) { if (disk->flags & GENHD_FL_EXT_DEVT) @@ -221,19 +212,6 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } -static inline dev_t part_devt(struct hd_struct *part) -{ - return part_to_dev(part)->devt; -} - -extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); - -static inline void disk_put_part(struct hd_struct *part) -{ - if (likely(part)) - put_device(part_to_dev(part)); -} - /* * Smarter partition iterator without context limits. */ -- cgit v1.2.3 From c214550ff8eae9623605149fa4e3da3ba43df145 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 29 Nov 2020 22:05:50 +0200 Subject: net: delete __dev_getfirstbyhwtype The last user of the RTNL brother of dev_getfirstbyhwtype (the latter being synchronized under RCU) has been deleted in commit b4db2b35fc44 ("afs: Use core kernel UUID generation"). Cc: Arnd Bergmann Cc: David Howells Cc: Eric Dumazet Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20201129200550.2433401-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8eeb73ac58bd..2e077e289e75 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2815,7 +2815,6 @@ unsigned long netdev_boot_base(const char *prefix, int unit); struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, const char *hwaddr); struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); -struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type); void dev_add_pack(struct packet_type *pt); void dev_remove_pack(struct packet_type *pt); void __dev_remove_pack(struct packet_type *pt); -- cgit v1.2.3 From 1446e1df9eb183fdf81c3f0715402f1d7595d4cb Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 27 Nov 2020 14:32:34 -0500 Subject: kernel: Implement selective syscall userspace redirection Introduce a mechanism to quickly disable/enable syscall handling for a specific process and redirect to userspace via SIGSYS. This is useful for processes with parts that require syscall redirection and parts that don't, but who need to perform this boundary crossing really fast, without paying the cost of a system call to reconfigure syscall handling on each boundary transition. This is particularly important for Windows games running over Wine. The proposed interface looks like this: prctl(PR_SET_SYSCALL_USER_DISPATCH, , , , [selector]) The range [,+) is a part of the process memory map that is allowed to by-pass the redirection code and dispatch syscalls directly, such that in fast paths a process doesn't need to disable the trap nor the kernel has to check the selector. This is essential to return from SIGSYS to a blocked area without triggering another SIGSYS from rt_sigreturn. selector is an optional pointer to a char-sized userspace memory region that has a key switch for the mechanism. This key switch is set to either PR_SYS_DISPATCH_ON, PR_SYS_DISPATCH_OFF to enable and disable the redirection without calling the kernel. The feature is meant to be set per-thread and it is disabled on fork/clone/execv. Internally, this doesn't add overhead to the syscall hot path, and it requires very little per-architecture support. I avoided using seccomp, even though it duplicates some functionality, due to previous feedback that maybe it shouldn't mix with seccomp since it is not a security mechanism. And obviously, this should never be considered a security mechanism, since any part of the program can by-pass it by using the syscall dispatcher. For the sysinfo benchmark, which measures the overhead added to executing a native syscall that doesn't require interception, the overhead using only the direct dispatcher region to issue syscalls is pretty much irrelevant. The overhead of using the selector goes around 40ns for a native (unredirected) syscall in my system, and it is (as expected) dominated by the supervisor-mode user-address access. In fact, with SMAP off, the overhead is consistently less than 5ns on my test box. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Acked-by: Peter Zijlstra (Intel) Acked-by: Kees Cook Link: https://lore.kernel.org/r/20201127193238.821364-4-krisman@collabora.com --- include/linux/sched.h | 2 ++ include/linux/syscall_user_dispatch.h | 40 +++++++++++++++++++++++++++++++++++ include/linux/thread_info.h | 2 ++ 3 files changed, 44 insertions(+) create mode 100644 include/linux/syscall_user_dispatch.h (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 063cd120b459..5a24a033b3f8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -965,6 +966,7 @@ struct task_struct { unsigned int sessionid; #endif struct seccomp seccomp; + struct syscall_user_dispatch syscall_dispatch; /* Thread group tracking: */ u64 parent_exec_id; diff --git a/include/linux/syscall_user_dispatch.h b/include/linux/syscall_user_dispatch.h new file mode 100644 index 000000000000..a0ae443fb7df --- /dev/null +++ b/include/linux/syscall_user_dispatch.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Collabora Ltd. + */ +#ifndef _SYSCALL_USER_DISPATCH_H +#define _SYSCALL_USER_DISPATCH_H + +#include + +#ifdef CONFIG_GENERIC_ENTRY + +struct syscall_user_dispatch { + char __user *selector; + unsigned long offset; + unsigned long len; + bool on_dispatch; +}; + +int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, + unsigned long len, char __user *selector); + +#define clear_syscall_work_syscall_user_dispatch(tsk) \ + clear_task_syscall_work(tsk, SYSCALL_USER_DISPATCH) + +#else +struct syscall_user_dispatch {}; + +static inline int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, + unsigned long len, char __user *selector) +{ + return -EINVAL; +} + +static inline void clear_syscall_work_syscall_user_dispatch(struct task_struct *tsk) +{ +} + +#endif /* CONFIG_GENERIC_ENTRY */ + +#endif /* _SYSCALL_USER_DISPATCH_H */ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index ca80a214df09..c8a974cead73 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -42,6 +42,7 @@ enum syscall_work_bit { SYSCALL_WORK_BIT_SYSCALL_TRACE, SYSCALL_WORK_BIT_SYSCALL_EMU, SYSCALL_WORK_BIT_SYSCALL_AUDIT, + SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) @@ -49,6 +50,7 @@ enum syscall_work_bit { #define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) #define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) #define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT) +#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH) #endif #include -- cgit v1.2.3 From 11894468e39def270199f845b76df6c36d4ed133 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 27 Nov 2020 14:32:35 -0500 Subject: entry: Support Syscall User Dispatch on common syscall entry Syscall User Dispatch (SUD) must take precedence over seccomp and ptrace, since the use case is emulation (it can be invoked with a different ABI) such that seccomp filtering by syscall number doesn't make sense in the first place. In addition, either the syscall is dispatched back to userspace, in which case there is no resource for to trace, or the syscall will be executed, and seccomp/ptrace will execute next. Since SUD runs before tracepoints, it needs to be a SYSCALL_WORK_EXIT as well, just to prevent a trace exit event when dispatch was triggered. For that, the on_syscall_dispatch() examines context to skip the tracepoint, audit and other work. [ tglx: Add a comment on the exit side ] Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Acked-by: Peter Zijlstra (Intel) Acked-by: Kees Cook Link: https://lore.kernel.org/r/20201127193238.821364-5-krisman@collabora.com --- include/linux/entry-common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 49b26b216e4e..a6e98b4ba8e9 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -44,10 +44,12 @@ SYSCALL_WORK_SYSCALL_TRACE | \ SYSCALL_WORK_SYSCALL_EMU | \ SYSCALL_WORK_SYSCALL_AUDIT | \ + SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ ARCH_SYSCALL_WORK_ENTER) #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ SYSCALL_WORK_SYSCALL_TRACE | \ SYSCALL_WORK_SYSCALL_AUDIT | \ + SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ ARCH_SYSCALL_WORK_EXIT) /* -- cgit v1.2.3 From 96e2fbccd0fc806364a964fdf072bfc858a66109 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 1 Dec 2020 15:27:53 +0100 Subject: entry_Add_enter_from_user_mode_wrapper To be called from architecture specific code if the combo interfaces are not suitable. It simply calls __enter_from_user_mode(). This way __enter_from_user_mode will still be inlined because it is declared static __always_inline. [ tglx: Amend comments and move it to a different location in the header ] Signed-off-by: Sven Schnelle Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201201142755.31931-4-svens@linux.ibm.com --- include/linux/entry-common.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index a6e98b4ba8e9..da60980a2e7b 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -101,6 +101,27 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs } #endif +/** + * enter_from_user_mode - Establish state when coming from user mode + * + * Syscall/interrupt entry disables interrupts, but user mode is traced as + * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. + * + * 1) Tell lockdep that interrupts are disabled + * 2) Invoke context tracking if enabled to reactivate RCU + * 3) Trace interrupts off state + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct and interrupts are still + * disabled. The subsequent functions can be instrumented. + * + * This is invoked when there is architecture specific functionality to be + * done between establishing state and enabling interrupts. The caller must + * enable interrupts before invoking syscall_enter_from_user_mode_work(). + */ +void enter_from_user_mode(struct pt_regs *regs); + /** * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts * @regs: Pointer to currents pt_regs @@ -110,7 +131,8 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs * function returns all state is correct, interrupts are enabled and the * subsequent functions can be instrumented. * - * This handles lockdep, RCU (context tracking) and tracing state. + * This handles lockdep, RCU (context tracking) and tracing state, i.e. + * the functionality provided by enter_from_user_mode(). * * This is invoked when there is extra architecture specific functionality * to be done between establishing state and handling user mode entry work. -- cgit v1.2.3 From 310de1a678b2184c078c593dae343cb79c807f8d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 1 Dec 2020 15:27:54 +0100 Subject: entry: Add exit_to_user_mode() wrapper Called from architecture specific code when syscall_exit_to_user_mode() is not suitable. It simply calls __exit_to_user_mode(). This way __exit_to_user_mode() can still be inlined because it is declared static __always_inline. [ tglx: Amended comments and moved it to a different place in the header ] Signed-off-by: Sven Schnelle Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201201142755.31931-5-svens@linux.ibm.com --- include/linux/entry-common.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index da60980a2e7b..e370be8121aa 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -300,6 +300,25 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) } #endif +/** + * exit_to_user_mode - Fixup state when exiting to user mode + * + * Syscall/interrupt exit enables interrupts, but the kernel state is + * interrupts disabled when this is invoked. Also tell RCU about it. + * + * 1) Trace interrupts on state + * 2) Invoke context tracking if enabled to adjust RCU state + * 3) Invoke architecture specific last minute exit code, e.g. speculation + * mitigations, etc.: arch_exit_to_user_mode() + * 4) Tell lockdep that interrupts are enabled + * + * Invoked from architecture specific code when syscall_exit_to_user_mode() + * is not suitable as the last step before returning to userspace. Must be + * invoked with interrupts disabled and the caller must be + * non-instrumentable. + */ +void exit_to_user_mode(void); + /** * syscall_exit_to_user_mode - Handle work before returning to user mode * @regs: Pointer to currents pt_regs @@ -322,8 +341,8 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) * - Architecture specific one time work arch_exit_to_user_mode_prepare() * - Address limit and lockdep checks * - * 3) Final transition (lockdep, tracing, context tracking, RCU). Invokes - * arch_exit_to_user_mode() to handle e.g. speculation mitigations + * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the + * functionality in exit_to_user_mode(). */ void syscall_exit_to_user_mode(struct pt_regs *regs); -- cgit v1.2.3 From c6156e1da633f241e132eaea3b676d674376d770 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 1 Dec 2020 15:27:55 +0100 Subject: entry: Add syscall_exit_to_user_mode_work() This is the same as syscall_exit_to_user_mode() but without calling exit_to_user_mode(). This can be used if there is an architectural reason to avoid the combo function, e.g. restarting a syscall without returning to userspace. Before returning to user space the caller has to invoke exit_to_user_mode(). [ tglx: Amended comments ] Signed-off-by: Sven Schnelle Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201201142755.31931-6-svens@linux.ibm.com --- include/linux/entry-common.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index e370be8121aa..7c581a4c3797 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -316,9 +316,25 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) * is not suitable as the last step before returning to userspace. Must be * invoked with interrupts disabled and the caller must be * non-instrumentable. + * The caller has to invoke syscall_exit_to_user_mode_work() before this. */ void exit_to_user_mode(void); +/** + * syscall_exit_to_user_mode_work - Handle work before returning to user mode + * @regs: Pointer to currents pt_regs + * + * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling + * exit_to_user_mode() to perform the final transition to user mode. + * + * Calling convention is the same as for syscall_exit_to_user_mode() and it + * returns with all work handled and interrupts disabled. The caller must + * invoke exit_to_user_mode() before actually switching to user mode to + * make the final state transitions. Interrupts must stay disabled between + * return from this function and the invocation of exit_to_user_mode(). + */ +void syscall_exit_to_user_mode_work(struct pt_regs *regs); + /** * syscall_exit_to_user_mode - Handle work before returning to user mode * @regs: Pointer to currents pt_regs @@ -343,6 +359,10 @@ void exit_to_user_mode(void); * * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the * functionality in exit_to_user_mode(). + * + * This is a combination of syscall_exit_to_user_mode_work() (1,2) and + * exit_to_user_mode(). This function is preferred unless there is a + * compelling architectural reason to use the seperate functions. */ void syscall_exit_to_user_mode(struct pt_regs *regs); -- cgit v1.2.3 From 6b6667aa4d1e0866f00b62d35a9be3875c7551f8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 24 Nov 2020 17:58:12 +0000 Subject: block: optimise for_each_bvec() advance Because of how for_each_bvec() works it never advances across multiple entries at a time, so bvec_iter_advance() is an overkill. Add specialised bvec_iter_advance_single() that is faster. It also handles zero-len bvecs, so can kill bvec_iter_skip_zero_bvec(). text data bss dec hex filename before: 23977 805 0 24782 60ce lib/iov_iter.o before, bvec_iter_advance() w/o WARN_ONCE() 22886 600 0 23486 5bbe ./lib/iov_iter.o after: 21862 600 0 22462 57be lib/iov_iter.o Signed-off-by: Pavel Begunkov Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bvec.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 2efec10bf792..ff832e698efb 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -121,18 +121,28 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, return true; } -static inline void bvec_iter_skip_zero_bvec(struct bvec_iter *iter) +/* + * A simpler version of bvec_iter_advance(), @bytes should not span + * across multiple bvec entries, i.e. bytes <= bv[i->bi_idx].bv_len + */ +static inline void bvec_iter_advance_single(const struct bio_vec *bv, + struct bvec_iter *iter, unsigned int bytes) { - iter->bi_bvec_done = 0; - iter->bi_idx++; + unsigned int done = iter->bi_bvec_done + bytes; + + if (done == bv[iter->bi_idx].bv_len) { + done = 0; + iter->bi_idx++; + } + iter->bi_bvec_done = done; + iter->bi_size -= bytes; } #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ - (bvl).bv_len ? (void)bvec_iter_advance((bio_vec), &(iter), \ - (bvl).bv_len) : bvec_iter_skip_zero_bvec(&(iter))) + bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ -- cgit v1.2.3 From 22b56c2964386ddced252be407150b22f85e209e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 24 Nov 2020 17:58:13 +0000 Subject: bio: optimise bvec iteration __bio_for_each_bvec(), __bio_for_each_segment() and bio_copy_data_iter() fall under conditions of bvec_iter_advance_single(), which is a faster and slimmer version of bvec_iter_advance(). Add bio_advance_iter_single() and convert them. Signed-off-by: Pavel Begunkov Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index ecf67108f091..1edda614f7ce 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -148,11 +148,24 @@ static inline void bio_advance_iter(const struct bio *bio, /* TODO: It is reasonable to complete bio with error here. */ } +/* @bytes should be less or equal to bvec[i->bi_idx].bv_len */ +static inline void bio_advance_iter_single(const struct bio *bio, + struct bvec_iter *iter, + unsigned int bytes) +{ + iter->bi_sector += bytes >> 9; + + if (bio_no_advance_iter(bio)) + iter->bi_size -= bytes; + else + bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); +} + #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bio_iter_iovec((bio), (iter))), 1); \ - bio_advance_iter((bio), &(iter), (bvl).bv_len)) + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) #define bio_for_each_segment(bvl, bio, iter) \ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) @@ -161,7 +174,7 @@ static inline void bio_advance_iter(const struct bio *bio, for (iter = (start); \ (iter).bi_size && \ ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ - bio_advance_iter((bio), &(iter), (bvl).bv_len)) + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) /* iterate over multi-page bvec */ #define bio_for_each_bvec(bvl, bio, iter) \ -- cgit v1.2.3 From 93b8959a0a8cf1b1a493efee9e8328681e111862 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 15:24:41 -0500 Subject: NFS: More readdir cleanups Remove the redundant caching of the credential in struct nfs_open_dir_context. Pass the buffer size as an argument to nfs_readdir_xdr_filler(). Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a2c6455ea3fa..dd6b463dda80 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -88,7 +88,6 @@ struct nfs_open_context { struct nfs_open_dir_context { struct list_head list; - const struct cred *cred; unsigned long attr_gencount; __u64 dir_cookie; __u64 dup_cookie; -- cgit v1.2.3 From 82e22a5e6245873779db1607d3b0fec6f9ca07d0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Nov 2020 17:34:23 -0500 Subject: NFS: Allow the NFS generic code to pass in a verifier to readdir If we're ever going to allow support for servers that use the readdir verifier, then that use needs to be managed by the middle layers as those need to be able to reject cookies from other verifiers. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- include/linux/nfs_xdr.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d63cb862d58e..3327239fa2f9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -750,6 +750,20 @@ struct nfs_entry { struct nfs_server * server; }; +struct nfs_readdir_arg { + struct dentry *dentry; + const struct cred *cred; + __be32 *verf; + u64 cookie; + struct page **pages; + unsigned int page_len; + bool plus; +}; + +struct nfs_readdir_res { + __be32 *verf; +}; + /* * The following types are for NFSv2 only. */ @@ -1744,8 +1758,7 @@ struct nfs_rpc_ops { unsigned int, struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, const struct qstr *); - int (*readdir) (struct dentry *, const struct cred *, - u64, struct page **, unsigned int, bool); + int (*readdir) (struct nfs_readdir_arg *, struct nfs_readdir_res *); int (*mknod) (struct inode *, struct dentry *, struct iattr *, dev_t); int (*statfs) (struct nfs_server *, struct nfs_fh *, -- cgit v1.2.3 From b593c09f83a2732a0f0298c8f3468236a83cdd9f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Nov 2020 20:06:12 -0500 Subject: NFS: Improve handling of directory verifiers If the server insists on using the readdir verifiers in order to allow cookies to expire, then we should ensure that we cache the verifier with the cookie, so that we can return an error if the application tries to use the expired cookie. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- include/linux/nfs_fs.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index dd6b463dda80..681ed98e4ba8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -45,6 +45,11 @@ */ #define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) +/* + * Size of the NFS directory verifier + */ +#define NFS_DIR_VERIFIER_SIZE 2 + /* * NFSv3/v4 Access mode cache entry */ @@ -89,6 +94,7 @@ struct nfs_open_context { struct nfs_open_dir_context { struct list_head list; unsigned long attr_gencount; + __be32 verf[NFS_DIR_VERIFIER_SIZE]; __u64 dir_cookie; __u64 dup_cookie; signed char duped; @@ -156,7 +162,7 @@ struct nfs_inode { * This is the cookie verifier used for NFSv3 readdir * operations */ - __be32 cookieverf[2]; + __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; atomic_long_t nrequests; struct nfs_mds_commit_info commit_info; -- cgit v1.2.3 From d5aa6b22e2258f05317313ecc02efbb988ed6d38 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Nov 2020 16:33:38 -0500 Subject: SUNRPC: xprt_load_transport() needs to support the netid "rdma6" According to RFC5666, the correct netid for an IPv6 addressed RDMA transport is "rdma6", which we've supported as a mount option since Linux-4.7. The problem is when we try to load the module "xprtrdma6", that will fail, since there is no modulealias of that name. Fixes: 181342c5ebe8 ("xprtrdma: Add rdma6 option to support NFS/RDMA IPv6") Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a603d48d2b2c..3ac5037d1c3d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -330,6 +330,7 @@ struct xprt_class { struct rpc_xprt * (*setup)(struct xprt_create *); struct module *owner; char name[32]; + const char * netid[]; }; /* -- cgit v1.2.3 From 1fc5f13186440973e1aa1d85aa263326756af431 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Nov 2020 09:41:21 -0500 Subject: SUNRPC: Add a helper to return the transport identifier given a netid Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3ac5037d1c3d..f7b75c72f80e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -386,6 +386,7 @@ xprt_disable_swap(struct rpc_xprt *xprt) int xprt_register_transport(struct xprt_class *type); int xprt_unregister_transport(struct xprt_class *type); int xprt_load_transport(const char *); +int xprt_find_transport_ident(const char *); void xprt_wait_for_reply_request_def(struct rpc_task *task); void xprt_wait_for_reply_request_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); -- cgit v1.2.3 From c87b056e58e71ba7a3f603700618f8da9742aa29 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Nov 2020 10:32:14 -0500 Subject: SUNRPC: Remove unused function xprt_load_transport() Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index f7b75c72f80e..d2e97ee802af 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -385,7 +385,6 @@ xprt_disable_swap(struct rpc_xprt *xprt) */ int xprt_register_transport(struct xprt_class *type); int xprt_unregister_transport(struct xprt_class *type); -int xprt_load_transport(const char *); int xprt_find_transport_ident(const char *); void xprt_wait_for_reply_request_def(struct rpc_task *task); void xprt_wait_for_reply_request_rtt(struct rpc_task *task); -- cgit v1.2.3 From 8a6a5920d3286eb0eae9f36a4ec4fc9df511eccb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2020 12:57:30 +0100 Subject: sched/vtime: Consolidate IRQ time accounting The 3 architectures implementing CONFIG_VIRT_CPU_ACCOUNTING_NATIVE all have their own version of irq time accounting that dispatch the cputime to the appropriate index: hardirq, softirq, system, idle, guest... from an all-in-one function. Instead of having these ad-hoc versions, move the cputime destination dispatch decision to the core code and leave only the actual per-index cputime accounting to the architecture. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201202115732.27827-4-frederic@kernel.org --- include/linux/vtime.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 2cdeca062db3..6c9867419615 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -83,16 +83,12 @@ static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { } #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -extern void vtime_account_irq_enter(struct task_struct *tsk); -static inline void vtime_account_irq_exit(struct task_struct *tsk) -{ - /* On hard|softirq exit we always account to hard|softirq cputime */ - vtime_account_kernel(tsk); -} +extern void vtime_account_irq(struct task_struct *tsk); +extern void vtime_account_softirq(struct task_struct *tsk); +extern void vtime_account_hardirq(struct task_struct *tsk); extern void vtime_flush(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -static inline void vtime_account_irq_enter(struct task_struct *tsk) { } -static inline void vtime_account_irq_exit(struct task_struct *tsk) { } +static inline void vtime_account_irq(struct task_struct *tsk) { } static inline void vtime_flush(struct task_struct *tsk) { } #endif @@ -105,13 +101,13 @@ static inline void irqtime_account_irq(struct task_struct *tsk) { } static inline void account_irq_enter_time(struct task_struct *tsk) { - vtime_account_irq_enter(tsk); + vtime_account_irq(tsk); irqtime_account_irq(tsk); } static inline void account_irq_exit_time(struct task_struct *tsk) { - vtime_account_irq_exit(tsk); + vtime_account_irq(tsk); irqtime_account_irq(tsk); } -- cgit v1.2.3 From d3759e7184f8f6187e62f8c4e7dcb1f6c47c075a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2020 12:57:31 +0100 Subject: irqtime: Move irqtime entry accounting after irq offset incrementation IRQ time entry is currently accounted before HARDIRQ_OFFSET or SOFTIRQ_OFFSET are incremented. This is convenient to decide to which index the cputime to account is dispatched. Unfortunately it prevents tick_irq_enter() from being called under HARDIRQ_OFFSET because tick_irq_enter() has to be called before the IRQ entry accounting due to the necessary clock catch up. As a result we don't benefit from appropriate lockdep coverage on tick_irq_enter(). To prepare for fixing this, move the IRQ entry cputime accounting after the preempt offset is incremented. This requires the cputime dispatch code to handle the extra offset. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201202115732.27827-5-frederic@kernel.org --- include/linux/hardirq.h | 4 ++-- include/linux/vtime.h | 34 ++++++++++++++++++++++++---------- 2 files changed, 26 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 754f67ac4326..7c9d6a2d7e90 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -32,9 +32,9 @@ static __always_inline void rcu_irq_enter_check_tick(void) */ #define __irq_enter() \ do { \ - account_irq_enter_time(current); \ preempt_count_add(HARDIRQ_OFFSET); \ lockdep_hardirq_enter(); \ + account_hardirq_enter(current); \ } while (0) /* @@ -62,8 +62,8 @@ void irq_enter_rcu(void); */ #define __irq_exit() \ do { \ + account_hardirq_exit(current); \ lockdep_hardirq_exit(); \ - account_irq_exit_time(current); \ preempt_count_sub(HARDIRQ_OFFSET); \ } while (0) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 6c9867419615..041d6524d144 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -83,32 +83,46 @@ static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { } #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -extern void vtime_account_irq(struct task_struct *tsk); +extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset); extern void vtime_account_softirq(struct task_struct *tsk); extern void vtime_account_hardirq(struct task_struct *tsk); extern void vtime_flush(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -static inline void vtime_account_irq(struct task_struct *tsk) { } +static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { } +static inline void vtime_account_softirq(struct task_struct *tsk) { } +static inline void vtime_account_hardirq(struct task_struct *tsk) { } static inline void vtime_flush(struct task_struct *tsk) { } #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING -extern void irqtime_account_irq(struct task_struct *tsk); +extern void irqtime_account_irq(struct task_struct *tsk, unsigned int offset); #else -static inline void irqtime_account_irq(struct task_struct *tsk) { } +static inline void irqtime_account_irq(struct task_struct *tsk, unsigned int offset) { } #endif -static inline void account_irq_enter_time(struct task_struct *tsk) +static inline void account_softirq_enter(struct task_struct *tsk) { - vtime_account_irq(tsk); - irqtime_account_irq(tsk); + vtime_account_irq(tsk, SOFTIRQ_OFFSET); + irqtime_account_irq(tsk, SOFTIRQ_OFFSET); } -static inline void account_irq_exit_time(struct task_struct *tsk) +static inline void account_softirq_exit(struct task_struct *tsk) { - vtime_account_irq(tsk); - irqtime_account_irq(tsk); + vtime_account_softirq(tsk); + irqtime_account_irq(tsk, 0); +} + +static inline void account_hardirq_enter(struct task_struct *tsk) +{ + vtime_account_irq(tsk, HARDIRQ_OFFSET); + irqtime_account_irq(tsk, HARDIRQ_OFFSET); +} + +static inline void account_hardirq_exit(struct task_struct *tsk) +{ + vtime_account_hardirq(tsk); + irqtime_account_irq(tsk, 0); } #endif /* _LINUX_KERNEL_VTIME_H */ -- cgit v1.2.3 From 278b13ce3a89698711c5a67792ba2dba41555433 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 2 Oct 2019 10:33:02 -0700 Subject: Input: remove input_polled_dev implementation Now that normal input devices support polling mode, and all users of input_polled_dev API have been converted, we can remove it. Signed-off-by: Dmitry Torokhov --- include/linux/input-polldev.h | 58 ------------------------------------------- 1 file changed, 58 deletions(-) delete mode 100644 include/linux/input-polldev.h (limited to 'include/linux') diff --git a/include/linux/input-polldev.h b/include/linux/input-polldev.h deleted file mode 100644 index 14821fd231c0..000000000000 --- a/include/linux/input-polldev.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _INPUT_POLLDEV_H -#define _INPUT_POLLDEV_H - -/* - * Copyright (c) 2007 Dmitry Torokhov - */ - -#include -#include - -/** - * struct input_polled_dev - simple polled input device - * @private: private driver data. - * @open: driver-supplied method that prepares device for polling - * (enabled the device and maybe flushes device state). - * @close: driver-supplied method that is called when device is no - * longer being polled. Used to put device into low power mode. - * @poll: driver-supplied method that polls the device and posts - * input events (mandatory). - * @poll_interval: specifies how often the poll() method should be called. - * Defaults to 500 msec unless overridden when registering the device. - * @poll_interval_max: specifies upper bound for the poll interval. - * Defaults to the initial value of @poll_interval. - * @poll_interval_min: specifies lower bound for the poll interval. - * Defaults to 0. - * @input: input device structure associated with the polled device. - * Must be properly initialized by the driver (id, name, phys, bits). - * - * Polled input device provides a skeleton for supporting simple input - * devices that do not raise interrupts but have to be periodically - * scanned or polled to detect changes in their state. - */ -struct input_polled_dev { - void *private; - - void (*open)(struct input_polled_dev *dev); - void (*close)(struct input_polled_dev *dev); - void (*poll)(struct input_polled_dev *dev); - unsigned int poll_interval; /* msec */ - unsigned int poll_interval_max; /* msec */ - unsigned int poll_interval_min; /* msec */ - - struct input_dev *input; - -/* private: */ - struct delayed_work work; - - bool devres_managed; -}; - -struct input_polled_dev *input_allocate_polled_device(void); -struct input_polled_dev *devm_input_allocate_polled_device(struct device *dev); -void input_free_polled_device(struct input_polled_dev *dev); -int input_register_polled_device(struct input_polled_dev *dev); -void input_unregister_polled_device(struct input_polled_dev *dev); - -#endif -- cgit v1.2.3 From 427167c0b064ed898b848209add62b4322ec7840 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 2 Dec 2020 09:25:15 -0800 Subject: bpf: Allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks I have to now lock/unlock socket for the bind hook execution. That shouldn't cause any overhead because the socket is unbound and shouldn't receive any traffic. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Andrey Ignatov Link: https://lore.kernel.org/bpf/20201202172516.3483656-3-sdf@google.com --- include/linux/bpf-cgroup.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index ed71bd1a0825..72e69a0e1e8c 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -246,11 +246,11 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, __ret; \ }) -#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND) +#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND) +#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \ sk->sk_prot->pre_connect) @@ -434,8 +434,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -- cgit v1.2.3 From ec0caa974cd092549ab282deb8ec7ea73b36eba0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 2 Dec 2020 18:20:37 -0800 Subject: fscrypt: introduce fscrypt_prepare_readdir() The last remaining use of fscrypt_get_encryption_info() from filesystems is for readdir (->iterate_shared()). Every other call is now in fs/crypto/ as part of some other higher-level operation. We need to add a new argument to fscrypt_get_encryption_info() to indicate whether the encryption policy is allowed to be unrecognized or not. Doing this is easier if we can work with high-level operations rather than direct filesystem use of fscrypt_get_encryption_info(). So add a function fscrypt_prepare_readdir() which wraps the call to fscrypt_get_encryption_info() for the readdir use case. Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201203022041.230976-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 0c9e64969b73..8cbb26f55695 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -242,6 +242,7 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, unsigned int flags); int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); +int __fscrypt_prepare_readdir(struct inode *dir); int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags); int fscrypt_prepare_symlink(struct inode *dir, const char *target, @@ -537,6 +538,11 @@ static inline int __fscrypt_prepare_lookup(struct inode *dir, return -EOPNOTSUPP; } +static inline int __fscrypt_prepare_readdir(struct inode *dir) +{ + return -EOPNOTSUPP; +} + static inline int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) @@ -795,6 +801,24 @@ static inline int fscrypt_prepare_lookup(struct inode *dir, return 0; } +/** + * fscrypt_prepare_readdir() - prepare to read a possibly-encrypted directory + * @dir: the directory inode + * + * If the directory is encrypted and it doesn't already have its encryption key + * set up, try to set it up so that the filenames will be listed in plaintext + * form rather than in no-key form. + * + * Return: 0 on success; -errno on error. Note that the encryption key being + * unavailable is not considered an error. + */ +static inline int fscrypt_prepare_readdir(struct inode *dir) +{ + if (IS_ENCRYPTED(dir)) + return __fscrypt_prepare_readdir(dir); + return 0; +} + /** * fscrypt_prepare_setattr() - prepare to change a possibly-encrypted inode's * attributes -- cgit v1.2.3 From 7622350e5eda2cc57a72c6b27f1405d8b4f94670 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 2 Dec 2020 18:20:38 -0800 Subject: fscrypt: move body of fscrypt_prepare_setattr() out-of-line In preparation for reducing the visibility of fscrypt_require_key() by moving it to fscrypt_private.h, move the call to it from fscrypt_prepare_setattr() to an out-of-line function. Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201203022041.230976-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 8cbb26f55695..b20900bb829f 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -243,6 +243,7 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); int __fscrypt_prepare_readdir(struct inode *dir); +int __fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr); int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags); int fscrypt_prepare_symlink(struct inode *dir, const char *target, @@ -543,6 +544,12 @@ static inline int __fscrypt_prepare_readdir(struct inode *dir) return -EOPNOTSUPP; } +static inline int __fscrypt_prepare_setattr(struct dentry *dentry, + struct iattr *attr) +{ + return -EOPNOTSUPP; +} + static inline int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) @@ -840,8 +847,8 @@ static inline int fscrypt_prepare_readdir(struct inode *dir) static inline int fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr) { - if (attr->ia_valid & ATTR_SIZE) - return fscrypt_require_key(d_inode(dentry)); + if (IS_ENCRYPTED(d_inode(dentry))) + return __fscrypt_prepare_setattr(dentry, attr); return 0; } -- cgit v1.2.3 From de3cdc6e75179a2324c23400b21483a1372c95e1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 2 Dec 2020 18:20:39 -0800 Subject: fscrypt: move fscrypt_require_key() to fscrypt_private.h fscrypt_require_key() is now only used by files in fs/crypto/. So reduce its visibility to fscrypt_private.h. This is also a prerequsite for unexporting fscrypt_get_encryption_info(). Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201203022041.230976-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index b20900bb829f..a07610f27926 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -688,32 +688,6 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return fscrypt_get_info(inode) != NULL; } -/** - * fscrypt_require_key() - require an inode's encryption key - * @inode: the inode we need the key for - * - * If the inode is encrypted, set up its encryption key if not already done. - * Then require that the key be present and return -ENOKEY otherwise. - * - * No locks are needed, and the key will live as long as the struct inode --- so - * it won't go away from under you. - * - * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code - * if a problem occurred while setting up the encryption key. - */ -static inline int fscrypt_require_key(struct inode *inode) -{ - if (IS_ENCRYPTED(inode)) { - int err = fscrypt_get_encryption_info(inode); - - if (err) - return err; - if (!fscrypt_has_encryption_key(inode)) - return -ENOKEY; - } - return 0; -} - /** * fscrypt_prepare_link() - prepare to link an inode into a possibly-encrypted * directory -- cgit v1.2.3 From 5b421f08801fe8247dec368b3d323958f419e769 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 2 Dec 2020 18:20:40 -0800 Subject: fscrypt: unexport fscrypt_get_encryption_info() Now that fscrypt_get_encryption_info() is only called from files in fs/crypto/ (due to all key setup now being handled by higher-level helper functions instead of directly by filesystems), unexport it and move its declaration to fscrypt_private.h. Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201203022041.230976-9-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index a07610f27926..4b163f5e58e9 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -75,7 +75,7 @@ struct fscrypt_operations { static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) { /* - * Pairs with the cmpxchg_release() in fscrypt_get_encryption_info(). + * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). * I.e., another task may publish ->i_crypt_info concurrently, executing * a RELEASE barrier. We need to use smp_load_acquire() here to safely * ACQUIRE the memory the other task published. @@ -200,7 +200,6 @@ int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg); int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg); /* keysetup.c */ -int fscrypt_get_encryption_info(struct inode *inode); int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, bool *encrypt_ret); void fscrypt_put_encryption_info(struct inode *inode); @@ -408,10 +407,6 @@ static inline int fscrypt_ioctl_get_key_status(struct file *filp, } /* keysetup.c */ -static inline int fscrypt_get_encryption_info(struct inode *inode) -{ - return -EOPNOTSUPP; -} static inline int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, -- cgit v1.2.3 From a14d0b6764917b21ee6fdfd2a8a4c2920fbefcce Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 2 Dec 2020 18:20:41 -0800 Subject: fscrypt: allow deleting files with unsupported encryption policy Currently it's impossible to delete files that use an unsupported encryption policy, as the kernel will just return an error when performing any operation on the top-level encrypted directory, even just a path lookup into the directory or opening the directory for readdir. More specifically, this occurs in any of the following cases: - The encryption context has an unrecognized version number. Current kernels know about v1 and v2, but there could be more versions in the future. - The encryption context has unrecognized encryption modes (FSCRYPT_MODE_*) or flags (FSCRYPT_POLICY_FLAG_*), an unrecognized combination of modes, or reserved bits set. - The encryption key has been added and the encryption modes are recognized but aren't available in the crypto API -- for example, a directory is encrypted with FSCRYPT_MODE_ADIANTUM but the kernel doesn't have CONFIG_CRYPTO_ADIANTUM enabled. It's desirable to return errors for most operations on files that use an unsupported encryption policy, but the current behavior is too strict. We need to allow enough to delete files, so that people can't be stuck with undeletable files when downgrading kernel versions. That includes allowing directories to be listed and allowing dentries to be looked up. Fix this by modifying the key setup logic to treat an unsupported encryption policy in the same way as "key unavailable" in the cases that are required for a recursive delete to work: preparing for a readdir or a dentry lookup, revalidating a dentry, or checking whether an inode has the same encryption policy as its parent directory. Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201203022041.230976-10-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 4b163f5e58e9..d23156d1ac94 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -753,8 +753,9 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * * Prepare for ->lookup() in a directory which may be encrypted by determining * the name that will actually be used to search the directory on-disk. If the - * directory's encryption key is available, then the lookup is assumed to be by - * plaintext name; otherwise, it is assumed to be by no-key name. + * directory's encryption policy is supported by this kernel and its encryption + * key is available, then the lookup is assumed to be by plaintext name; + * otherwise, it is assumed to be by no-key name. * * This also installs a custom ->d_revalidate() method which will invalidate the * dentry if it was created without the key and the key is later added. @@ -786,7 +787,9 @@ static inline int fscrypt_prepare_lookup(struct inode *dir, * form rather than in no-key form. * * Return: 0 on success; -errno on error. Note that the encryption key being - * unavailable is not considered an error. + * unavailable is not considered an error. It is also not an error if + * the encryption policy is unsupported by this kernel; that is treated + * like the key being unavailable, so that files can still be deleted. */ static inline int fscrypt_prepare_readdir(struct inode *dir) { -- cgit v1.2.3 From bcfe06bf2622f7c4899468e427683aec49070687 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:27 -0800 Subject: mm: memcontrol: Use helpers to read page's memcg data Patch series "mm: allow mapping accounted kernel pages to userspace", v6. Currently a non-slab kernel page which has been charged to a memory cgroup can't be mapped to userspace. The underlying reason is simple: PageKmemcg flag is defined as a page type (like buddy, offline, etc), so it takes a bit from a page->mapped counter. Pages with a type set can't be mapped to userspace. But in general the kmemcg flag has nothing to do with mapping to userspace. It only means that the page has been accounted by the page allocator, so it has to be properly uncharged on release. Some bpf maps are mapping the vmalloc-based memory to userspace, and their memory can't be accounted because of this implementation detail. This patchset removes this limitation by moving the PageKmemcg flag into one of the free bits of the page->mem_cgroup pointer. Also it formalizes accesses to the page->mem_cgroup and page->obj_cgroups using new helpers, adds several checks and removes a couple of obsolete functions. As the result the code became more robust with fewer open-coded bit tricks. This patch (of 4): Currently there are many open-coded reads of the page->mem_cgroup pointer, as well as a couple of read helpers, which are barely used. It creates an obstacle on a way to reuse some bits of the pointer for storing additional bits of information. In fact, we already do this for slab pages, where the last bit indicates that a pointer has an attached vector of objcg pointers instead of a regular memcg pointer. This commits uses 2 existing helpers and introduces a new helper to converts all read sides to calls of these helpers: struct mem_cgroup *page_memcg(struct page *page); struct mem_cgroup *page_memcg_rcu(struct page *page); struct mem_cgroup *page_memcg_check(struct page *page); page_memcg_check() is intended to be used in cases when the page can be a slab page and have a memcg pointer pointing at objcg vector. It does check the lowest bit, and if set, returns NULL. page_memcg() contains a VM_BUG_ON_PAGE() check for the page not being a slab page. To make sure nobody uses a direct access, struct page's mem_cgroup/obj_cgroups is converted to unsigned long memcg_data. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Michal Hocko Link: https://lkml.kernel.org/r/20201027001657.3398190-1-guro@fb.com Link: https://lkml.kernel.org/r/20201027001657.3398190-2-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-2-guro@fb.com --- include/linux/memcontrol.h | 114 +++++++++++++++++++++++++++++++++++++++++---- include/linux/mm.h | 22 --------- include/linux/mm_types.h | 5 +- 3 files changed, 106 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e391e3c56de5..f95c1433461c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -343,6 +343,79 @@ struct mem_cgroup { extern struct mem_cgroup *root_mem_cgroup; +/* + * page_memcg - get the memory cgroup associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper memory cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages. + * + * Any of the following ensures page and memcg binding stability: + * - the page lock + * - LRU isolation + * - lock_page_memcg() + * - exclusive reference + */ +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + VM_BUG_ON_PAGE(PageSlab(page), page); + return (struct mem_cgroup *)page->memcg_data; +} + +/* + * page_memcg_rcu - locklessly get the memory cgroup associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper memory cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages. + */ +static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +{ + VM_BUG_ON_PAGE(PageSlab(page), page); + WARN_ON_ONCE(!rcu_read_lock_held()); + + return (struct mem_cgroup *)READ_ONCE(page->memcg_data); +} + +/* + * page_memcg_check - get the memory cgroup associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function unlike page_memcg() can take any page + * as an argument. It has to be used in cases when it's not known if a page + * has an associated memory cgroup pointer or an object cgroups vector. + * + * Any of the following ensures page and memcg binding stability: + * - the page lock + * - LRU isolation + * - lock_page_memcg() + * - exclusive reference + */ +static inline struct mem_cgroup *page_memcg_check(struct page *page) +{ + /* + * Because page->memcg_data might be changed asynchronously + * for slab pages, READ_ONCE() should be used here. + */ + unsigned long memcg_data = READ_ONCE(page->memcg_data); + + /* + * The lowest bit set means that memcg isn't a valid + * memcg pointer, but a obj_cgroups pointer. + * In this case the page is shared and doesn't belong + * to any specific memory cgroup. + */ + if (memcg_data & 0x1UL) + return NULL; + + return (struct mem_cgroup *)memcg_data; +} + static __always_inline bool memcg_stat_item_in_bytes(int idx) { if (idx == MEMCG_PERCPU_B) @@ -743,15 +816,19 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, static inline void __mod_memcg_page_state(struct page *page, int idx, int val) { - if (page->mem_cgroup) - __mod_memcg_state(page->mem_cgroup, idx, val); + struct mem_cgroup *memcg = page_memcg(page); + + if (memcg) + __mod_memcg_state(memcg, idx, val); } static inline void mod_memcg_page_state(struct page *page, int idx, int val) { - if (page->mem_cgroup) - mod_memcg_state(page->mem_cgroup, idx, val); + struct mem_cgroup *memcg = page_memcg(page); + + if (memcg) + mod_memcg_state(memcg, idx, val); } static inline unsigned long lruvec_page_state(struct lruvec *lruvec, @@ -834,16 +911,17 @@ static inline void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { struct page *head = compound_head(page); /* rmap on tail pages */ + struct mem_cgroup *memcg = page_memcg(head); pg_data_t *pgdat = page_pgdat(page); struct lruvec *lruvec; /* Untracked pages have no memcg, no lruvec. Update only the node */ - if (!head->mem_cgroup) { + if (!memcg) { __mod_node_page_state(pgdat, idx, val); return; } - lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat); + lruvec = mem_cgroup_lruvec(memcg, pgdat); __mod_lruvec_state(lruvec, idx, val); } @@ -878,8 +956,10 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, static inline void count_memcg_page_event(struct page *page, enum vm_event_item idx) { - if (page->mem_cgroup) - count_memcg_events(page->mem_cgroup, idx, 1); + struct mem_cgroup *memcg = page_memcg(page); + + if (memcg) + count_memcg_events(memcg, idx, 1); } static inline void count_memcg_event_mm(struct mm_struct *mm, @@ -941,6 +1021,22 @@ void mem_cgroup_split_huge_fixup(struct page *head); struct mem_cgroup; +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return NULL; +} + +static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return NULL; +} + +static inline struct mem_cgroup *page_memcg_check(struct page *page) +{ + return NULL; +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return true; @@ -1430,7 +1526,7 @@ static inline void mem_cgroup_track_foreign_dirty(struct page *page, if (mem_cgroup_disabled()) return; - if (unlikely(&page->mem_cgroup->css != wb->memcg_css)) + if (unlikely(&page_memcg(page)->css != wb->memcg_css)) mem_cgroup_track_foreign_dirty_slowpath(page, wb); } diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4e..6b0c9d2c1d10 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1484,28 +1484,6 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } -#ifdef CONFIG_MEMCG -static inline struct mem_cgroup *page_memcg(struct page *page) -{ - return page->mem_cgroup; -} -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) -{ - WARN_ON_ONCE(!rcu_read_lock_held()); - return READ_ONCE(page->mem_cgroup); -} -#else -static inline struct mem_cgroup *page_memcg(struct page *page) -{ - return NULL; -} -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) -{ - WARN_ON_ONCE(!rcu_read_lock_held()); - return NULL; -} -#endif - /* * Some inline functions in vmstat.h depend on page_zone() */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5a9238f6caad..80f5d755c037 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -199,10 +199,7 @@ struct page { atomic_t _refcount; #ifdef CONFIG_MEMCG - union { - struct mem_cgroup *mem_cgroup; - struct obj_cgroup **obj_cgroups; - }; + unsigned long memcg_data; #endif /* -- cgit v1.2.3 From 270c6a71460e12b07b1dcadf7457ff95b6c6e8f4 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:28 -0800 Subject: mm: memcontrol/slab: Use helpers to access slab page's memcg_data To gather all direct accesses to struct page's memcg_data field in one place, let's introduce 3 new helpers to use in the slab accounting code: struct obj_cgroup **page_objcgs(struct page *page); struct obj_cgroup **page_objcgs_check(struct page *page); bool set_page_objcgs(struct page *page, struct obj_cgroup **objcgs); They are similar to the corresponding API for generic pages, except that the setter can return false, indicating that the value has been already set from a different thread. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/20201027001657.3398190-3-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-3-guro@fb.com --- include/linux/memcontrol.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f95c1433461c..c7ac0a5b8989 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -416,6 +416,70 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return (struct mem_cgroup *)memcg_data; } +#ifdef CONFIG_MEMCG_KMEM +/* + * page_objcgs - get the object cgroups vector associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroups vector associated with the page, + * or NULL. This function assumes that the page is known to have an + * associated object cgroups vector. It's not safe to call this function + * against pages, which might have an associated memory cgroup: e.g. + * kernel stack pages. + */ +static inline struct obj_cgroup **page_objcgs(struct page *page) +{ + return (struct obj_cgroup **)(READ_ONCE(page->memcg_data) & ~0x1UL); +} + +/* + * page_objcgs_check - get the object cgroups vector associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroups vector associated with the page, + * or NULL. This function is safe to use if the page can be directly associated + * with a memory cgroup. + */ +static inline struct obj_cgroup **page_objcgs_check(struct page *page) +{ + unsigned long memcg_data = READ_ONCE(page->memcg_data); + + if (memcg_data && (memcg_data & 0x1UL)) + return (struct obj_cgroup **)(memcg_data & ~0x1UL); + + return NULL; +} + +/* + * set_page_objcgs - associate a page with a object cgroups vector + * @page: a pointer to the page struct + * @objcgs: a pointer to the object cgroups vector + * + * Atomically associates a page with a vector of object cgroups. + */ +static inline bool set_page_objcgs(struct page *page, + struct obj_cgroup **objcgs) +{ + return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | 0x1UL); +} +#else +static inline struct obj_cgroup **page_objcgs(struct page *page) +{ + return NULL; +} + +static inline struct obj_cgroup **page_objcgs_check(struct page *page) +{ + return NULL; +} + +static inline bool set_page_objcgs(struct page *page, + struct obj_cgroup **objcgs) +{ + return true; +} +#endif + static __always_inline bool memcg_stat_item_in_bytes(int idx) { if (idx == MEMCG_PERCPU_B) -- cgit v1.2.3 From 87944e2992bd28098c6806086a1e96bb4d0e502b Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:29 -0800 Subject: mm: Introduce page memcg flags The lowest bit in page->memcg_data is used to distinguish between struct memory_cgroup pointer and a pointer to a objcgs array. All checks and modifications of this bit are open-coded. Let's formalize it using page memcg flags, defined in enum page_memcg_data_flags. Additional flags might be added later. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Michal Hocko Link: https://lkml.kernel.org/r/20201027001657.3398190-4-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-4-guro@fb.com --- include/linux/memcontrol.h | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c7ac0a5b8989..99a4841d658b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -343,6 +343,15 @@ struct mem_cgroup { extern struct mem_cgroup *root_mem_cgroup; +enum page_memcg_data_flags { + /* page->memcg_data is a pointer to an objcgs vector */ + MEMCG_DATA_OBJCGS = (1UL << 0), + /* the next bit after the last actual flag */ + __NR_MEMCG_DATA_FLAGS = (1UL << 1), +}; + +#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) + /* * page_memcg - get the memory cgroup associated with a page * @page: a pointer to the page struct @@ -404,13 +413,7 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) */ unsigned long memcg_data = READ_ONCE(page->memcg_data); - /* - * The lowest bit set means that memcg isn't a valid - * memcg pointer, but a obj_cgroups pointer. - * In this case the page is shared and doesn't belong - * to any specific memory cgroup. - */ - if (memcg_data & 0x1UL) + if (memcg_data & MEMCG_DATA_OBJCGS) return NULL; return (struct mem_cgroup *)memcg_data; @@ -429,7 +432,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) */ static inline struct obj_cgroup **page_objcgs(struct page *page) { - return (struct obj_cgroup **)(READ_ONCE(page->memcg_data) & ~0x1UL); + unsigned long memcg_data = READ_ONCE(page->memcg_data); + + VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page); + + return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* @@ -444,10 +451,10 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) { unsigned long memcg_data = READ_ONCE(page->memcg_data); - if (memcg_data && (memcg_data & 0x1UL)) - return (struct obj_cgroup **)(memcg_data & ~0x1UL); + if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS)) + return NULL; - return NULL; + return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* @@ -460,7 +467,8 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) static inline bool set_page_objcgs(struct page *page, struct obj_cgroup **objcgs) { - return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | 0x1UL); + return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | + MEMCG_DATA_OBJCGS); } #else static inline struct obj_cgroup **page_objcgs(struct page *page) -- cgit v1.2.3 From 18b2db3b0385226b71cb3288474fa5a6e4a45474 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:30 -0800 Subject: mm: Convert page kmemcg type to a page memcg flag PageKmemcg flag is currently defined as a page type (like buddy, offline, table and guard). Semantically it means that the page was accounted as a kernel memory by the page allocator and has to be uncharged on the release. As a side effect of defining the flag as a page type, the accounted page can't be mapped to userspace (look at page_has_type() and comments above). In particular, this blocks the accounting of vmalloc-backed memory used by some bpf maps, because these maps do map the memory to userspace. One option is to fix it by complicating the access to page->mapcount, which provides some free bits for page->page_type. But it's way better to move this flag into page->memcg_data flags. Indeed, the flag makes no sense without enabled memory cgroups and memory cgroup pointer set in particular. This commit replaces PageKmemcg() and __SetPageKmemcg() with PageMemcgKmem() and an open-coded OR operation setting the memcg pointer with the MEMCG_DATA_KMEM bit. __ClearPageKmemcg() can be simple deleted, as the whole memcg_data is zeroed at once. As a bonus, on !CONFIG_MEMCG build the PageMemcgKmem() check will be compiled out. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Michal Hocko Link: https://lkml.kernel.org/r/20201027001657.3398190-5-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-5-guro@fb.com --- include/linux/memcontrol.h | 37 +++++++++++++++++++++++++++++++++---- include/linux/page-flags.h | 11 ++--------- 2 files changed, 35 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 99a4841d658b..7c9d43476166 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -346,8 +346,10 @@ extern struct mem_cgroup *root_mem_cgroup; enum page_memcg_data_flags { /* page->memcg_data is a pointer to an objcgs vector */ MEMCG_DATA_OBJCGS = (1UL << 0), + /* page has been accounted as a non-slab kernel page */ + MEMCG_DATA_KMEM = (1UL << 1), /* the next bit after the last actual flag */ - __NR_MEMCG_DATA_FLAGS = (1UL << 1), + __NR_MEMCG_DATA_FLAGS = (1UL << 2), }; #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) @@ -369,8 +371,12 @@ enum page_memcg_data_flags { */ static inline struct mem_cgroup *page_memcg(struct page *page) { + unsigned long memcg_data = page->memcg_data; + VM_BUG_ON_PAGE(PageSlab(page), page); - return (struct mem_cgroup *)page->memcg_data; + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); + + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* @@ -387,7 +393,8 @@ static inline struct mem_cgroup *page_memcg_rcu(struct page *page) VM_BUG_ON_PAGE(PageSlab(page), page); WARN_ON_ONCE(!rcu_read_lock_held()); - return (struct mem_cgroup *)READ_ONCE(page->memcg_data); + return (struct mem_cgroup *)(READ_ONCE(page->memcg_data) & + ~MEMCG_DATA_FLAGS_MASK); } /* @@ -416,7 +423,21 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) if (memcg_data & MEMCG_DATA_OBJCGS) return NULL; - return (struct mem_cgroup *)memcg_data; + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + +/* + * PageMemcgKmem - check if the page has MemcgKmem flag set + * @page: a pointer to the page struct + * + * Checks if the page has MemcgKmem flag set. The caller must ensure that + * the page has an associated memory cgroup. It's not safe to call this function + * against some types of pages, e.g. slab pages. + */ +static inline bool PageMemcgKmem(struct page *page) +{ + VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page); + return page->memcg_data & MEMCG_DATA_KMEM; } #ifdef CONFIG_MEMCG_KMEM @@ -435,6 +456,7 @@ static inline struct obj_cgroup **page_objcgs(struct page *page) unsigned long memcg_data = READ_ONCE(page->memcg_data); VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } @@ -454,6 +476,8 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS)) return NULL; + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } @@ -1109,6 +1133,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return NULL; } +static inline bool PageMemcgKmem(struct page *page) +{ + return false; +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return true; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4f6ba9379112..fc0e1bd48e73 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -715,9 +715,8 @@ PAGEFLAG_FALSE(DoubleMap) #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 -#define PG_kmemcg 0x00000200 -#define PG_table 0x00000400 -#define PG_guard 0x00000800 +#define PG_table 0x00000200 +#define PG_guard 0x00000400 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -768,12 +767,6 @@ PAGE_TYPE_OPS(Buddy, buddy) */ PAGE_TYPE_OPS(Offline, offline) -/* - * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on - * pages allocated with __GFP_ACCOUNT. It gets cleared on page free. - */ -PAGE_TYPE_OPS(Kmemcg, kmemcg) - /* * Marks pages in use as page tables. */ -- cgit v1.2.3 From 48edc1f78aabeba35ed00e40c36f211de89e0090 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:32 -0800 Subject: bpf: Prepare for memcg-based memory accounting for bpf maps Bpf maps can be updated from an interrupt context and in such case there is no process which can be charged. It makes the memory accounting of bpf maps non-trivial. Fortunately, after commit 4127c6504f25 ("mm: kmem: enable kernel memcg accounting from interrupt contexts") and commit b87d8cefe43c ("mm, memcg: rework remote charging API to support nesting") it's finally possible. To make the ownership model simple and consistent, when the map is created, the memory cgroup of the current process is recorded. All subsequent allocations related to the bpf map are charged to the same memory cgroup. It includes allocations made by any processes (even if they do belong to a different cgroup) and from interrupts. This commit introduces 3 new helpers, which will be used by following commits to enable the accounting of bpf maps memory: - bpf_map_kmalloc_node() - bpf_map_kzalloc() - bpf_map_alloc_percpu() They are wrapping popular memory allocation functions. They set the active memory cgroup to the map's memory cgroup and add __GFP_ACCOUNT to the passed gfp flags. Then they call into the corresponding memory allocation function and restore the original active memory cgroup. These helpers are supposed to use everywhere except the map creation path. During the map creation when the map structure is allocated by itself, it cannot be passed to those helpers. In those cases default memory allocation function will be used with the __GFP_ACCOUNT flag. Signed-off-by: Roman Gushchin Acked-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201201215900.3569844-7-guro@fb.com --- include/linux/bpf.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1bcb6d7345c..e1f2c95c15ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -20,6 +20,8 @@ #include #include #include +#include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -37,6 +39,7 @@ struct bpf_iter_aux_info; struct bpf_local_storage; struct bpf_local_storage_map; struct kobject; +struct mem_cgroup; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -161,6 +164,9 @@ struct bpf_map { u32 btf_value_type_id; struct btf *btf; struct bpf_map_memory memory; +#ifdef CONFIG_MEMCG_KMEM + struct mem_cgroup *memcg; +#endif char name[BPF_OBJ_NAME_LEN]; u32 btf_vmlinux_value_type_id; bool bypass_spec_v1; @@ -1240,6 +1246,34 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); +#ifdef CONFIG_MEMCG_KMEM +void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, + int node); +void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); +void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, + size_t align, gfp_t flags); +#else +static inline void * +bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, + int node) +{ + return kmalloc_node(size, flags, node); +} + +static inline void * +bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) +{ + return kzalloc(size, flags); +} + +static inline void __percpu * +bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, + gfp_t flags) +{ + return __alloc_percpu_gfp(size, align, flags); +} +#endif + extern int sysctl_unprivileged_bpf_disabled; static inline bool bpf_allow_ptr_leaks(void) -- cgit v1.2.3 From 80ee81e0403c48f4eb342f7c8d40477c89b8836a Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:58 -0800 Subject: bpf: Eliminate rlimit-based memory accounting infra for bpf maps Remove rlimit-based accounting infrastructure code, which is not used anymore. To provide a backward compatibility, use an approximation of the bpf map memory footprint as a "memlock" value, available to a user via map info. The approximation is based on the maximal number of elements and key and value sizes. Signed-off-by: Roman Gushchin Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201201215900.3569844-33-guro@fb.com --- include/linux/bpf.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1f2c95c15ec..61331a148cde 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -138,11 +138,6 @@ struct bpf_map_ops { const struct bpf_iter_seq_info *iter_seq_info; }; -struct bpf_map_memory { - u32 pages; - struct user_struct *user; -}; - struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -163,7 +158,6 @@ struct bpf_map { u32 btf_key_type_id; u32 btf_value_type_id; struct btf *btf; - struct bpf_map_memory memory; #ifdef CONFIG_MEMCG_KMEM struct mem_cgroup *memcg; #endif @@ -1224,12 +1218,6 @@ void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); -int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); -void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); -int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size); -void bpf_map_charge_finish(struct bpf_map_memory *mem); -void bpf_map_charge_move(struct bpf_map_memory *dst, - struct bpf_map_memory *src); void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); -- cgit v1.2.3 From 3ac1f01b43b6e2759cc34d3a715ba5eed04c5805 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:59 -0800 Subject: bpf: Eliminate rlimit-based memory accounting for bpf progs Do not use rlimit-based memory accounting for bpf progs. It has been replaced with memcg-based memory accounting. Signed-off-by: Roman Gushchin Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201201215900.3569844-34-guro@fb.com --- include/linux/bpf.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 61331a148cde..a9de5711b23f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1202,8 +1202,6 @@ void bpf_prog_sub(struct bpf_prog *prog, int i); void bpf_prog_inc(struct bpf_prog *prog); struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -int __bpf_prog_charge(struct user_struct *user, u32 pages); -void __bpf_prog_uncharge(struct user_struct *user, u32 pages); void __bpf_free_used_maps(struct bpf_prog_aux *aux, struct bpf_map **used_maps, u32 len); @@ -1512,15 +1510,6 @@ bpf_prog_inc_not_zero(struct bpf_prog *prog) return ERR_PTR(-EOPNOTSUPP); } -static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) -{ - return 0; -} - -static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) -{ -} - static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog) -- cgit v1.2.3 From 608af703519a58f5a7da4273809211cac27edfb2 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 19 Nov 2020 06:09:02 +0000 Subject: libfs: Add generic function for setting dentry_ops This adds a function to set dentry operations at lookup time that will work for both encrypted filenames and casefolded filenames. A filesystem that supports both features simultaneously can use this function during lookup preparations to set up its dentry operations once fscrypt no longer does that itself. Currently the casefolding dentry operation are always set if the filesystem defines an encoding because the features is toggleable on empty directories. Unlike in the encryption case, the dentry operations used come from the parent. Since we don't know what set of functions we'll eventually need, and cannot change them later, we enable the casefolding operations if the filesystem supports them at all. By splitting out the various cases, we support as few dentry operations as we can get away with, maximizing compatibility with overlayfs, which will not function if a filesystem supports certain dentry_operations. Signed-off-by: Daniel Rosenberg Reviewed-by: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jaegeuk Kim --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 21cc971fd960..4a25ab4dbd3e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3186,6 +3186,7 @@ extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); #endif +extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, -- cgit v1.2.3 From bb9cd9106b22b4fc5ff8d78a752be8a4ba2cbba5 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 19 Nov 2020 06:09:03 +0000 Subject: fscrypt: Have filesystems handle their d_ops This shifts the responsibility of setting up dentry operations from fscrypt to the individual filesystems, allowing them to have their own operations while still setting fscrypt's d_revalidate as appropriate. Most filesystems can just use generic_set_encrypted_ci_d_ops, unless they have their own specific dentry operations as well. That operation will set the minimal d_ops required under the circumstances. Since the fscrypt d_ops are set later on, we must set all d_ops there, since we cannot adjust those later on. This should not result in any change in behavior. Signed-off-by: Daniel Rosenberg Acked-by: Theodore Ts'o Acked-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- include/linux/fscrypt.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index a8f7a43f031b..e72f80482671 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -741,8 +741,11 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * directory's encryption key is available, then the lookup is assumed to be by * plaintext name; otherwise, it is assumed to be by no-key name. * - * This also installs a custom ->d_revalidate() method which will invalidate the - * dentry if it was created without the key and the key is later added. + * This will set DCACHE_NOKEY_NAME on the dentry if the lookup is by no-key + * name. In this case the filesystem must assign the dentry a dentry_operations + * which contains fscrypt_d_revalidate (or contains a d_revalidate method that + * calls fscrypt_d_revalidate), so that the dentry will be invalidated if the + * directory's encryption key is later added. * * Return: 0 on success; -ENOENT if the directory's key is unavailable but the * filename isn't a valid no-key name, so a negative dentry should be created; -- cgit v1.2.3 From b28f047b28c51d0b9864c34b097bb0b221ea7247 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 26 Nov 2020 18:32:09 +0800 Subject: f2fs: compress: support chksum This patch supports to store chksum value with compressed data, and verify the integrality of compressed data while reading the data. The feature can be enabled through specifying mount option 'compress_chksum'. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a5dbb57a687f..7dc2a06cf19a 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -273,7 +273,7 @@ struct f2fs_inode { __le64 i_compr_blocks; /* # of compressed blocks */ __u8 i_compress_algorithm; /* compress algorithm */ __u8 i_log_cluster_size; /* log of cluster size */ - __le16 i_padding; /* padding */ + __le16 i_compress_flag; /* compress flag */ __le32 i_extra_end[0]; /* for attribute size calculation */ } __packed; __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ -- cgit v1.2.3 From 39be39ceffd572baddfeff8b50aba931d3d6d785 Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Sun, 4 Oct 2020 21:15:46 -0700 Subject: Input: add input_device_enabled() A helper function for drivers to decide if the device is used or not. A lockdep check is introduced as inspecting ->users should be done under input device's mutex. Signed-off-by: Andrzej Pietrasiewicz Link: https://lore.kernel.org/r/20200608112211.12125-2-andrzej.p@collabora.com Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 56f2fd32e609..eda4587dba67 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -502,6 +502,8 @@ bool input_match_device_id(const struct input_dev *dev, void input_enable_softrepeat(struct input_dev *dev, int delay, int period); +bool input_device_enabled(struct input_dev *dev); + extern struct class input_class; /** -- cgit v1.2.3 From a181616487dbdbc953e476d1da15365f887859ed Mon Sep 17 00:00:00 2001 From: Patrik Fimml Date: Wed, 2 Dec 2020 14:42:04 -0800 Subject: Input: Add "inhibited" property Userspace might want to implement a policy to temporarily disregard input from certain devices, including not treating them as wakeup sources. An example use case is a laptop, whose keyboard can be folded under the screen to create tablet-like experience. The user then must hold the laptop in such a way that it is difficult to avoid pressing the keyboard keys. It is therefore desirable to temporarily disregard input from the keyboard, until it is folded back. This obviously is a policy which should be kept out of the kernel, but the kernel must provide suitable means to implement such a policy. This patch adds a sysfs interface for exactly this purpose. To implement the said interface it adds an "inhibited" property to struct input_dev, and effectively creates four states a device can be in: closed uninhibited, closed inhibited, open uninhibited, open inhibited. It also defers calling driver's ->open() and ->close() to until they are actually needed, e.g. it makes no sense to prepare the underlying device for generating events (->open()) if the device is inhibited. uninhibit closed <------------ closed uninhibited ------------> inhibited | ^ inhibit | ^ 1st | | 1st | | open | | open | | | | | | | | last | | last | | close | | close v | uninhibit v | open <------------ open uninhibited ------------> inhibited The top inhibit/uninhibit transition happens when users == 0. The bottom inhibit/uninhibit transition happens when users > 0. The left open/close transition happens when !inhibited. The right open/close transition happens when inhibited. Due to all transitions being serialized with dev->mutex, it is impossible to have "diagonal" transitions between closed uninhibited and open inhibited or between open uninhibited and closed inhibited. No new callbacks are added to drivers, because their open() and close() serve exactly the purpose to tell the driver to start/stop providing events to the input core. Consequently, open() and close() - if provided - are called in both inhibit and uninhibit paths. Signed-off-by: Patrik Fimml Co-developed-by: Andrzej Pietrasiewicz Signed-off-by: Andrzej Pietrasiewicz Link: https://lore.kernel.org/r/20200608112211.12125-8-andrzej.p@collabora.com Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index eda4587dba67..0354b298d874 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -90,9 +90,11 @@ enum input_clock_type { * @open: this method is called when the very first user calls * input_open_device(). The driver must prepare the device * to start generating events (start polling thread, - * request an IRQ, submit URB, etc.) + * request an IRQ, submit URB, etc.). The meaning of open() is + * to start providing events to the input core. * @close: this method is called when the very last user calls - * input_close_device(). + * input_close_device(). The meaning of close() is to stop + * providing events to the input core. * @flush: purges the device. Most commonly used to get rid of force * feedback effects loaded into the device when disconnecting * from it @@ -127,6 +129,10 @@ enum input_clock_type { * and needs not be explicitly unregistered or freed. * @timestamp: storage for a timestamp set by input_set_timestamp called * by a driver + * @inhibited: indicates that the input device is inhibited. If that is + * the case then input core ignores any events generated by the device. + * Device's close() is called when it is being inhibited and its open() + * is called when it is being uninhibited. */ struct input_dev { const char *name; @@ -201,6 +207,8 @@ struct input_dev { bool devres_managed; ktime_t timestamp[INPUT_CLK_MAX]; + + bool inhibited; }; #define to_input_dev(d) container_of(d, struct input_dev, dev) -- cgit v1.2.3 From 41a340941854c4606a9b71b9d68db412747e7c84 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 2 Dec 2020 15:13:26 +0100 Subject: media: coda: Convert the driver to DT-only Since 5.10-rc1 i.MX is a devicetree-only platform, so simplify the code by removing the unused non-DT support. Signed-off-by: Fabio Estevam Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/platform_data/media/coda.h | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 include/linux/platform_data/media/coda.h (limited to 'include/linux') diff --git a/include/linux/platform_data/media/coda.h b/include/linux/platform_data/media/coda.h deleted file mode 100644 index 293b61b60c9d..000000000000 --- a/include/linux/platform_data/media/coda.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2013 Philipp Zabel, Pengutronix - */ -#ifndef PLATFORM_CODA_H -#define PLATFORM_CODA_H - -struct device; - -struct coda_platform_data { - struct device *iram_dev; -}; - -#endif -- cgit v1.2.3 From 2a4a06da8a4b93dd189171eed7a99fffd38f42f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Nov 2020 11:41:40 +0100 Subject: mm/gup: Provide gup_get_pte() more generic In order to write another lockless page-table walker, we need gup_get_pte() exposed. While doing that, rename it to ptep_get_lockless() to match the existing ptep_get() naming. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201126121121.036370527@infradead.org --- include/linux/pgtable.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 71125a4676c4..ed9266cc115b 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -258,6 +258,61 @@ static inline pte_t ptep_get(pte_t *ptep) } #endif +#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH +/* + * WARNING: only to be used in the get_user_pages_fast() implementation. + * + * With get_user_pages_fast(), we walk down the pagetables without taking any + * locks. For this we would like to load the pointers atomically, but sometimes + * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What + * we do have is the guarantee that a PTE will only either go from not present + * to present, or present to not present or both -- it will not switch to a + * completely different present page without a TLB flush in between; something + * that we are blocking by holding interrupts off. + * + * Setting ptes from not present to present goes: + * + * ptep->pte_high = h; + * smp_wmb(); + * ptep->pte_low = l; + * + * And present to not present goes: + * + * ptep->pte_low = 0; + * smp_wmb(); + * ptep->pte_high = 0; + * + * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'. + * We load pte_high *after* loading pte_low, which ensures we don't see an older + * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't + * picked up a changed pte high. We might have gotten rubbish values from + * pte_low and pte_high, but we are guaranteed that pte_low will not have the + * present bit set *unless* it is 'l'. Because get_user_pages_fast() only + * operates on present ptes we're safe. + */ +static inline pte_t ptep_get_lockless(pte_t *ptep) +{ + pte_t pte; + + do { + pte.pte_low = ptep->pte_low; + smp_rmb(); + pte.pte_high = ptep->pte_high; + smp_rmb(); + } while (unlikely(pte.pte_low != ptep->pte_low)); + + return pte; +} +#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +/* + * We require that the PTE can be read atomically. + */ +static inline pte_t ptep_get_lockless(pte_t *ptep) +{ + return ptep_get(ptep); +} +#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, -- cgit v1.2.3 From 560dabbdf68bb15f9e241af8f828b1c8c38d6c6f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Nov 2020 11:45:36 +0100 Subject: mm: Introduce pXX_leaf_size() A number of architectures have non-pagetable aligned huge/large pages. For such architectures a leaf can actually be part of a larger entry. Provide generic helpers to determine the size of a page-table leaf. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Matthew Wilcox (Oracle) Link: https://lkml.kernel.org/r/20201126121121.102580109@infradead.org --- include/linux/pgtable.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ed9266cc115b..fefbbdbbb3f3 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1536,4 +1536,20 @@ typedef unsigned int pgtbl_mod_mask; #define pmd_leaf(x) 0 #endif +#ifndef pgd_leaf_size +#define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT) +#endif +#ifndef p4d_leaf_size +#define p4d_leaf_size(x) P4D_SIZE +#endif +#ifndef pud_leaf_size +#define pud_leaf_size(x) PUD_SIZE +#endif +#ifndef pmd_leaf_size +#define pmd_leaf_size(x) PMD_SIZE +#endif +#ifndef pte_leaf_size +#define pte_leaf_size(x) PAGE_SIZE +#endif + #endif /* _LINUX_PGTABLE_H */ -- cgit v1.2.3 From a07c45312f06e288417049208c344ad76074627d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:50:38 +0100 Subject: seqlock: avoid -Wshadow warnings When building with W=2, there is a flood of warnings about the seqlock macros shadowing local variables: 19806 linux/seqlock.h:331:11: warning: declaration of 'seq' shadows a previous local [-Wshadow] 48 linux/seqlock.h:348:11: warning: declaration of 'seq' shadows a previous local [-Wshadow] 8 linux/seqlock.h:379:11: warning: declaration of 'seq' shadows a previous local [-Wshadow] Prefix the local variables to make the warning useful elsewhere again. Fixes: 52ac39e5db51 ("seqlock: seqcount_t: Implement all read APIs as statement expressions") Signed-off-by: Arnd Bergmann Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201026165044.3722931-1-arnd@kernel.org --- include/linux/seqlock.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index cbfc78b92b65..8d8552474c64 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -328,13 +328,13 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define __read_seqcount_begin(s) \ ({ \ - unsigned seq; \ + unsigned __seq; \ \ - while ((seq = __seqcount_sequence(s)) & 1) \ + while ((__seq = __seqcount_sequence(s)) & 1) \ cpu_relax(); \ \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ - seq; \ + __seq; \ }) /** @@ -345,10 +345,10 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define raw_read_seqcount_begin(s) \ ({ \ - unsigned seq = __read_seqcount_begin(s); \ + unsigned _seq = __read_seqcount_begin(s); \ \ smp_rmb(); \ - seq; \ + _seq; \ }) /** @@ -376,11 +376,11 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define raw_read_seqcount(s) \ ({ \ - unsigned seq = __seqcount_sequence(s); \ + unsigned __seq = __seqcount_sequence(s); \ \ smp_rmb(); \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ - seq; \ + __seq; \ }) /** -- cgit v1.2.3 From ab440b2c604b60fe90885270fcfeb5c3dd5d6fae Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Nov 2020 13:44:17 +0100 Subject: seqlock: Rename __seqprop() users More consistent naming should make it easier to untangle the _Generic token pasting maze called __seqprop(). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201110115358.GE2594@hirez.programming.kicks-ass.net --- include/linux/seqlock.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 8d8552474c64..d89134c74fba 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -307,10 +307,10 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu __seqprop_case((s), mutex, prop), \ __seqprop_case((s), ww_mutex, prop)) -#define __seqcount_ptr(s) __seqprop(s, ptr) -#define __seqcount_sequence(s) __seqprop(s, sequence) -#define __seqcount_lock_preemptible(s) __seqprop(s, preemptible) -#define __seqcount_assert_lock_held(s) __seqprop(s, assert) +#define seqprop_ptr(s) __seqprop(s, ptr) +#define seqprop_sequence(s) __seqprop(s, sequence) +#define seqprop_preemptible(s) __seqprop(s, preemptible) +#define seqprop_assert(s) __seqprop(s, assert) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier @@ -330,7 +330,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu ({ \ unsigned __seq; \ \ - while ((__seq = __seqcount_sequence(s)) & 1) \ + while ((__seq = seqprop_sequence(s)) & 1) \ cpu_relax(); \ \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ @@ -359,7 +359,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define read_seqcount_begin(s) \ ({ \ - seqcount_lockdep_reader_access(__seqcount_ptr(s)); \ + seqcount_lockdep_reader_access(seqprop_ptr(s)); \ raw_read_seqcount_begin(s); \ }) @@ -376,7 +376,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define raw_read_seqcount(s) \ ({ \ - unsigned __seq = __seqcount_sequence(s); \ + unsigned __seq = seqprop_sequence(s); \ \ smp_rmb(); \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ @@ -425,7 +425,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - __read_seqcount_t_retry(__seqcount_ptr(s), start) + __read_seqcount_t_retry(seqprop_ptr(s), start) static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) { @@ -445,7 +445,7 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - read_seqcount_t_retry(__seqcount_ptr(s), start) + read_seqcount_t_retry(seqprop_ptr(s), start) static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) { @@ -459,10 +459,10 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) */ #define raw_write_seqcount_begin(s) \ do { \ - if (__seqcount_lock_preemptible(s)) \ + if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - raw_write_seqcount_t_begin(__seqcount_ptr(s)); \ + raw_write_seqcount_t_begin(seqprop_ptr(s)); \ } while (0) static inline void raw_write_seqcount_t_begin(seqcount_t *s) @@ -478,9 +478,9 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s) */ #define raw_write_seqcount_end(s) \ do { \ - raw_write_seqcount_t_end(__seqcount_ptr(s)); \ + raw_write_seqcount_t_end(seqprop_ptr(s)); \ \ - if (__seqcount_lock_preemptible(s)) \ + if (seqprop_preemptible(s)) \ preempt_enable(); \ } while (0) @@ -501,12 +501,12 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s) */ #define write_seqcount_begin_nested(s, subclass) \ do { \ - __seqcount_assert_lock_held(s); \ + seqprop_assert(s); \ \ - if (__seqcount_lock_preemptible(s)) \ + if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass); \ + write_seqcount_t_begin_nested(seqprop_ptr(s), subclass); \ } while (0) static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) @@ -528,12 +528,12 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) */ #define write_seqcount_begin(s) \ do { \ - __seqcount_assert_lock_held(s); \ + seqprop_assert(s); \ \ - if (__seqcount_lock_preemptible(s)) \ + if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin(__seqcount_ptr(s)); \ + write_seqcount_t_begin(seqprop_ptr(s)); \ } while (0) static inline void write_seqcount_t_begin(seqcount_t *s) @@ -549,9 +549,9 @@ static inline void write_seqcount_t_begin(seqcount_t *s) */ #define write_seqcount_end(s) \ do { \ - write_seqcount_t_end(__seqcount_ptr(s)); \ + write_seqcount_t_end(seqprop_ptr(s)); \ \ - if (__seqcount_lock_preemptible(s)) \ + if (seqprop_preemptible(s)) \ preempt_enable(); \ } while (0) @@ -603,7 +603,7 @@ static inline void write_seqcount_t_end(seqcount_t *s) * } */ #define raw_write_seqcount_barrier(s) \ - raw_write_seqcount_t_barrier(__seqcount_ptr(s)) + raw_write_seqcount_t_barrier(seqprop_ptr(s)) static inline void raw_write_seqcount_t_barrier(seqcount_t *s) { @@ -623,7 +623,7 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s) * will complete successfully and see data older than this. */ #define write_seqcount_invalidate(s) \ - write_seqcount_t_invalidate(__seqcount_ptr(s)) + write_seqcount_t_invalidate(seqprop_ptr(s)) static inline void write_seqcount_t_invalidate(seqcount_t *s) { -- cgit v1.2.3 From b6498aad59b091e5618a9f05e7636e2ad2c6732d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 1 Dec 2020 13:09:00 +0100 Subject: completion: Drop init_completion define Changeset cd8084f91c02 ("locking/lockdep: Apply crossrelease to completions") added a CONFIG_LOCKDEP_COMPLETE (that was later renamed to CONFIG_LOCKDEP_COMPLETIONS). Such changeset renamed the init_completion, and add a macro that would either run a modified version or the original code. However, such code reported too many false positives. So, it ended being dropped later on by changeset e966eaeeb623 ("locking/lockdep: Remove the cross-release locking checks"). Yet, the define remained there as just: #define init_completion(x) __init_completion(x) Get rid of the define, and return __init_completion() function to its original name. Fixes: e966eaeeb623 ("locking/lockdep: Remove the cross-release locking checks") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/e657bfc533545c185b1c3c55926a449ead56a88b.1606823973.git.mchehab+huawei@kernel.org --- include/linux/completion.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index bf8e77001f18..51d9ab079629 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -28,8 +28,7 @@ struct completion { struct swait_queue_head wait; }; -#define init_completion_map(x, m) __init_completion(x) -#define init_completion(x) __init_completion(x) +#define init_completion_map(x, m) init_completion(x) static inline void complete_acquire(struct completion *x) {} static inline void complete_release(struct completion *x) {} @@ -82,7 +81,7 @@ static inline void complete_release(struct completion *x) {} * This inline function will initialize a dynamically created completion * structure. */ -static inline void __init_completion(struct completion *x) +static inline void init_completion(struct completion *x) { x->done = 0; init_swait_queue_head(&x->wait); -- cgit v1.2.3 From 97d62caa32d6d79dadae3f8d19af5c92ea9a589a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 1 Dec 2020 13:09:08 +0100 Subject: refcount: Fix a kernel-doc markup The kernel-doc markup is wrong: it is asking the tool to document struct refcount_struct, instead of documenting typedef refcount_t. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Peter Zijlstra (Intel) Acked-by: Kees Cook Link: https://lkml.kernel.org/r/afb9bb1e675bf5f72a34a55d780779d7d5916b4c.1606823973.git.mchehab+huawei@kernel.org --- include/linux/refcount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 497990c69b0b..b8a6e387f8f9 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -101,7 +101,7 @@ struct mutex; /** - * struct refcount_t - variant of atomic_t specialized for reference counts + * typedef refcount_t - variant of atomic_t specialized for reference counts * @refs: atomic_t counter field * * The counter saturates at REFCOUNT_SATURATED and will not move once -- cgit v1.2.3 From 950cc0d2bef078e1f6459900ca4d4b2a2e0e3c37 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 2 Dec 2020 14:07:07 +0200 Subject: fsnotify: generalize handle_inode_event() The handle_inode_event() interface was added as (quoting comment): "a simple variant of handle_event() for groups that only have inode marks and don't have ignore mask". In other words, all backends except fanotify. The inotify backend also falls under this category, but because it required extra arguments it was left out of the initial pass of backends conversion to the simple interface. This results in code duplication between the generic helper fsnotify_handle_event() and the inotify_handle_event() callback which also happen to be buggy code. Generalize the handle_inode_event() arguments and add the check for FS_EXCL_UNLINK flag to the generic helper, so inotify backend could be converted to use the simple interface. Link: https://lore.kernel.org/r/20201202120713.702387-2-amir73il@gmail.com CC: stable@vger.kernel.org Fixes: b9a1b9772509 ("fsnotify: create method handle_inode_event() in fsnotify_operations") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index f8529a3a2923..4ee3044eedd0 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -137,6 +137,7 @@ struct mem_cgroup; * if @file_name is not NULL, this is the directory that * @file_name is relative to. * @file_name: optional file name associated with event + * @cookie: inotify rename cookie * * free_group_priv - called when a group refcnt hits 0 to clean up the private union * freeing_mark - called when a mark is being destroyed for some reason. The group @@ -151,7 +152,7 @@ struct fsnotify_ops { struct fsnotify_iter_info *iter_info); int (*handle_inode_event)(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *file_name); + const struct qstr *file_name, u32 cookie); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); -- cgit v1.2.3 From 50a4952fd67b7f7f551e82ac07c51c1a7a74d474 Mon Sep 17 00:00:00 2001 From: Alexander Lochmann Date: Thu, 15 Oct 2020 15:24:52 +0200 Subject: Updated locking documentation for transaction_t We used LockDoc to derive locking rules for each member of struct transaction_t. Based on those results, we extended the existing documentation by more members of struct transaction_t, and updated the existing documentation. Link: https://lore.kernel.org/r/10cfbef1-994c-c604-f8a6-b1042fcc622f@tu-dortmund.de Signed-off-by: Alexander Lochmann Signed-off-by: Horst Schirmeier Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 578ff196b3ce..d2a4860feb72 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -538,6 +538,7 @@ struct transaction_chp_stats_s { * The transaction keeps track of all of the buffers modified by a * running transaction, and all of the buffers committed but not yet * flushed to home for finished transactions. + * (Locking Documentation improved by LockDoc) */ /* @@ -658,12 +659,12 @@ struct transaction_s unsigned long t_start; /* - * When commit was requested + * When commit was requested [j_state_lock] */ unsigned long t_requested; /* - * Checkpointing stats [j_checkpoint_sem] + * Checkpointing stats [j_list_lock] */ struct transaction_chp_stats_s t_chp_stats; -- cgit v1.2.3 From 14026b94ccfe626e512bc9fa01e0e72ee75c7a98 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:32 +0000 Subject: signal: Add unsafe_put_compat_sigset() Implement 'unsafe' version of put_compat_sigset() For the bigendian, use unsafe_put_user() directly to avoid intermediate copy through the stack. For the littleendian, use a straight unsafe_copy_to_user(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/537c7082ee309a0bb9c67a50c5d9dd929aedb82d.1597770847.git.christophe.leroy@csgroup.eu --- include/linux/compat.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 14d514233e1d..400c0941c8af 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -442,6 +442,38 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, #endif } +#ifdef CONFIG_CPU_BIG_ENDIAN +#define unsafe_put_compat_sigset(compat, set, label) do { \ + compat_sigset_t __user *__c = compat; \ + const sigset_t *__s = set; \ + \ + switch (_NSIG_WORDS) { \ + case 4: \ + unsafe_put_user(__s->sig[3] >> 32, &__c->sig[7], label); \ + unsafe_put_user(__s->sig[3], &__c->sig[6], label); \ + fallthrough; \ + case 3: \ + unsafe_put_user(__s->sig[2] >> 32, &__c->sig[5], label); \ + unsafe_put_user(__s->sig[2], &__c->sig[4], label); \ + fallthrough; \ + case 2: \ + unsafe_put_user(__s->sig[1] >> 32, &__c->sig[3], label); \ + unsafe_put_user(__s->sig[1], &__c->sig[2], label); \ + fallthrough; \ + case 1: \ + unsafe_put_user(__s->sig[0] >> 32, &__c->sig[1], label); \ + unsafe_put_user(__s->sig[0], &__c->sig[0], label); \ + } \ +} while (0) +#else +#define unsafe_put_compat_sigset(compat, set, label) do { \ + compat_sigset_t __user *__c = compat; \ + const sigset_t *__s = set; \ + \ + unsafe_copy_to_user(__c, __s, sizeof(*__c), label); \ +} while (0) +#endif + extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); -- cgit v1.2.3 From d4bff72c8401e6f56194ecf455db70ebc22929e2 Mon Sep 17 00:00:00 2001 From: Thomas Karlsson Date: Wed, 2 Dec 2020 19:49:58 +0100 Subject: macvlan: Support for high multicast packet rate Background: Broadcast and multicast packages are enqueued for later processing. This queue was previously hardcoded to 1000. This proved insufficient for handling very high packet rates. This resulted in packet drops for multicast. While at the same time unicast worked fine. The change: This patch make the queue length adjustable to accommodate for environments with very high multicast packet rate. But still keeps the default value of 1000 unless specified. The queue length is specified as a request per macvlan using the IFLA_MACVLAN_BC_QUEUE_LEN parameter. The actual used queue length will then be the maximum of any macvlan connected to the same port. The actual used queue length for the port can be retrieved (read only) by the IFLA_MACVLAN_BC_QUEUE_LEN_USED parameter for verification. This will be followed up by a patch to iproute2 in order to adjust the parameter from userspace. Signed-off-by: Thomas Karlsson Link: https://lore.kernel.org/r/dd4673b2-7eab-edda-6815-85c67ce87f63@paneda.se Signed-off-by: Jakub Kicinski --- include/linux/if_macvlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index a367ead4bf4b..96556c64c95d 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -30,6 +30,7 @@ struct macvlan_dev { enum macvlan_mode mode; u16 flags; unsigned int macaddr_count; + u32 bc_queue_len_req; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif -- cgit v1.2.3 From d421e466c2373095f165ddd25cbabd6c5b077928 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 2 Dec 2020 20:39:46 -0800 Subject: net/mlx5: DR, Proper handling of unsupported Connect-X6DX SW steering STEs format for Connect-X5 and Connect-X6DX different. Currently, on Connext-X6DX the SW steering would break at some point when building STEs w/o giving a proper error message. Fix this by checking the STE format of the current device when initializing domain: add mlx5_ifc definitions for Connect-X6DX SW steering, read FW capability to get the current format version, and check this version when domain is being created. Fixes: 26d688e33f88 ("net/mlx5: DR, Add Steering entry (STE) utilities") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a092346c7b2d..233352447b1a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1223,6 +1223,11 @@ enum mlx5_fc_bulk_alloc_bitmask { #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) +enum { + MLX5_STEERING_FORMAT_CONNECTX_5 = 0, + MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x30]; u8 vhca_id[0x10]; @@ -1521,7 +1526,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 general_obj_types[0x40]; - u8 reserved_at_440[0x20]; + u8 reserved_at_440[0x4]; + u8 steering_format_version[0x4]; + u8 create_qp_start_hint[0x18]; u8 reserved_at_460[0x3]; u8 log_max_uctx[0x5]; -- cgit v1.2.3 From 0fb6ee8d0b5e90b72f870f76debc8bd31a742014 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 24 Nov 2020 14:38:07 +0200 Subject: iio: ad_sigma_delta: Don't put SPI transfer buffer on the stack Use a heap allocated memory for the SPI transfer buffer. Using stack memory can corrupt stack memory when using DMA on some systems. This change moves the buffer from the stack of the trigger handler call to the heap of the buffer of the state struct. The size increases takes into account the alignment for the timestamp, which is 8 bytes. The 'data' buffer is split into 'tx_buf' and 'rx_buf', to make a clearer separation of which part of the buffer should be used for TX & RX. Fixes: af3008485ea03 ("iio:adc: Add common code for ADI Sigma Delta devices") Signed-off-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201124123807.19717-1-alexandru.ardelean@analog.com Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index a3a838dcf8e4..7199280d89ca 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -79,8 +79,12 @@ struct ad_sigma_delta { /* * DMA (thus cache coherency maintenance) requires the * transfer buffers to live in their own cache lines. + * 'tx_buf' is up to 32 bits. + * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, + * rounded to 16 bytes to take into account padding. */ - uint8_t data[4] ____cacheline_aligned; + uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t rx_buf[16] __aligned(8); }; static inline int ad_sigma_delta_set_channel(struct ad_sigma_delta *sd, -- cgit v1.2.3 From eca8523a388f65cc0f515e3db4f3b027d7546532 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 20 Sep 2020 14:25:48 +0100 Subject: iio:trigger: rename try_reenable() to reenable() plus return void As we no longer support a try again if we cannot reenable the trigger rename the function to reflect this. Also we don't do anything with the value returned so stop it returning anything. For the few drivers that didn't already print an error message in this patch, add such a print. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen Acked-by: Linus Walleij Acked-by: Srinivas Pandruvada Cc: Linus Walleij Cc: Srinivas Pandruvada Cc: Christian Oder Cc: Eugen Hristev Cc: Nishant Malpani Cc: Daniel Baluta Link: https://lore.kernel.org/r/20200920132548.196452-3-jic23@kernel.org --- include/linux/iio/trigger.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index f930c4ccf378..055890b6ffcf 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -21,7 +21,7 @@ struct iio_trigger; /** * struct iio_trigger_ops - operations structure for an iio_trigger. * @set_trigger_state: switch on/off the trigger on demand - * @try_reenable: function to reenable the trigger when the + * @reenable: function to reenable the trigger when the * use count is zero (may be NULL) * @validate_device: function to validate the device when the * current trigger gets changed. @@ -31,7 +31,7 @@ struct iio_trigger; **/ struct iio_trigger_ops { int (*set_trigger_state)(struct iio_trigger *trig, bool state); - int (*try_reenable)(struct iio_trigger *trig); + void (*reenable)(struct iio_trigger *trig); int (*validate_device)(struct iio_trigger *trig, struct iio_dev *indio_dev); }; -- cgit v1.2.3 From 41dd9596d6b239a125c3d19f9d0ca90bdbfbf876 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 30 Nov 2020 16:36:29 +0100 Subject: security: add const qualifier to struct sock in various places A followup change to tcp_request_sock_op would have to drop the 'const' qualifier from the 'route_req' function as the 'security_inet_conn_request' call is moved there - and that function expects a 'struct sock *'. However, it turns out its also possible to add a const qualifier to security_inet_conn_request instead. Signed-off-by: Florian Westphal Acked-by: James Morris Signed-off-by: Jakub Kicinski --- include/linux/lsm_audit.h | 2 +- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 28f23b341c1c..cd23355d2271 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -26,7 +26,7 @@ struct lsm_network_audit { int netif; - struct sock *sk; + const struct sock *sk; u16 family; __be16 dport; __be16 sport; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 32a940117e7a..acc0494cceba 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -301,7 +301,7 @@ LSM_HOOK(void, LSM_RET_VOID, sk_clone_security, const struct sock *sk, struct sock *newsk) LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, struct sock *sk, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, sock_graft, struct sock *sk, struct socket *parent) -LSM_HOOK(int, 0, inet_conn_request, struct sock *sk, struct sk_buff *skb, +LSM_HOOK(int, 0, inet_conn_request, const struct sock *sk, struct sk_buff *skb, struct request_sock *req) LSM_HOOK(void, LSM_RET_VOID, inet_csk_clone, struct sock *newsk, const struct request_sock *req) diff --git a/include/linux/security.h b/include/linux/security.h index bc2725491560..0df62735651b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1358,7 +1358,7 @@ void security_sk_clone(const struct sock *sk, struct sock *newsk); void security_sk_classify_flow(struct sock *sk, struct flowi *fl); void security_req_classify_flow(const struct request_sock *req, struct flowi *fl); void security_sock_graft(struct sock*sk, struct socket *parent); -int security_inet_conn_request(struct sock *sk, +int security_inet_conn_request(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); void security_inet_csk_clone(struct sock *newsk, const struct request_sock *req); @@ -1519,7 +1519,7 @@ static inline void security_sock_graft(struct sock *sk, struct socket *parent) { } -static inline int security_inet_conn_request(struct sock *sk, +static inline int security_inet_conn_request(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { return 0; -- cgit v1.2.3 From a15ac665b9e9c90b1557499f2a46c1e89d29154a Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 3 Dec 2020 22:35:11 +0100 Subject: vfio-mdev: Wire in a request handler for mdev parent While performing some destructive tests with vfio-ccw, where the paths to a device are forcible removed and thus the device itself is unreachable, it is rather easy to end up in an endless loop in vfio_del_group_dev() due to the lack of a request callback for the associated device. In this example, one MDEV (77c) is used by a guest, while another (77b) is not. The symptom is that the iommu is detached from the mdev for 77b, but not 77c, until that guest is shutdown: [ 238.794867] vfio_ccw 0.0.077b: MDEV: Unregistering [ 238.794996] vfio_mdev 11f2d2bc-4083-431d-a023-eff72715c4f0: Removing from iommu group 2 [ 238.795001] vfio_mdev 11f2d2bc-4083-431d-a023-eff72715c4f0: MDEV: detaching iommu [ 238.795036] vfio_ccw 0.0.077c: MDEV: Unregistering ...silence... Let's wire in the request call back to the mdev device, so that a device being physically removed from the host can be (gracefully?) handled by the parent device at the time the device is removed. Add a message when registering the device if a driver doesn't provide this callback, so a clue is given that this same loop may be encountered in a similar situation, and a message when this occurs instead of the awkward silence noted above. Signed-off-by: Eric Farman Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- include/linux/mdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 0ce30ca78db0..9004375c462e 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -72,6 +72,9 @@ struct device *mdev_get_iommu_device(struct device *dev); * @mmap: mmap callback * @mdev: mediated device structure * @vma: vma structure + * @request: request callback to release device + * @mdev: mediated device structure + * @count: request sequence number * Parent device that support mediated device should be registered with mdev * module with mdev_parent_ops structure. **/ @@ -92,6 +95,7 @@ struct mdev_parent_ops { long (*ioctl)(struct mdev_device *mdev, unsigned int cmd, unsigned long arg); int (*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma); + void (*request)(struct mdev_device *mdev, unsigned int count); }; /* interface for exporting mdev supported type attributes */ -- cgit v1.2.3 From 22dc4a0f5ed11b6dc8fd73a0892fa0ea1a4c3cdf Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:29 -0800 Subject: bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead, wherever BTF type IDs are involved, also track the instance of struct btf that goes along with the type ID. This allows to gradually add support for kernel module BTFs and using/tracking module types across BPF helper calls and registers. This patch also renames btf_id() function to btf_obj_id() to minimize naming clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID. Also, altough btf_vmlinux can't get destructed and thus doesn't need refcounting, module BTFs need that, so apply BTF refcounting universally when BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This makes for simpler clean up code. Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF trampoline key to include BTF object ID. To differentiate that from target program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are not allowed to take full 32 bits, so there is no danger of confusing that bit with a valid BTF type ID. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org --- include/linux/bpf.h | 13 +++++++++---- include/linux/bpf_verifier.h | 28 +++++++++++++++++++++------- include/linux/btf.h | 5 ++++- 3 files changed, 34 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9de5711b23f..d05e75ed8c1b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -421,7 +421,10 @@ struct bpf_insn_access_aux { enum bpf_reg_type reg_type; union { int ctx_field_size; - u32 btf_id; + struct { + struct btf *btf; + u32 btf_id; + }; }; struct bpf_verifier_log *log; /* for verbose logs */ }; @@ -458,6 +461,7 @@ struct bpf_verifier_ops { struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); int (*btf_struct_access)(struct bpf_verifier_log *log, + const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); @@ -771,6 +775,7 @@ struct bpf_prog_aux { u32 ctx_arg_info_size; u32 max_rdonly_access; u32 max_rdwr_access; + struct btf *attach_btf; const struct bpf_ctx_arg_aux *ctx_arg_info; struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; @@ -1005,7 +1010,6 @@ struct bpf_event_entry { bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); -const char *kernel_type_name(u32 btf_type_id); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); @@ -1450,12 +1454,13 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); -int btf_struct_access(struct bpf_verifier_log *log, +int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); bool btf_struct_ids_match(struct bpf_verifier_log *log, - int off, u32 id, u32 need_type_id); + const struct btf *btf, u32 id, int off, + const struct btf *need_btf, u32 need_type_id); int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 306869d4743b..e941fe1484e5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -5,6 +5,7 @@ #define _LINUX_BPF_VERIFIER_H 1 #include /* for enum bpf_reg_type */ +#include /* for struct btf and btf_id() */ #include /* for MAX_BPF_STACK */ #include @@ -43,6 +44,8 @@ enum bpf_reg_liveness { struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; + /* Fixed part of pointer offset, pointer types only */ + s32 off; union { /* valid when type == PTR_TO_PACKET */ int range; @@ -52,15 +55,20 @@ struct bpf_reg_state { */ struct bpf_map *map_ptr; - u32 btf_id; /* for PTR_TO_BTF_ID */ + /* for PTR_TO_BTF_ID */ + struct { + struct btf *btf; + u32 btf_id; + }; u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ /* Max size from any of the above. */ - unsigned long raw; + struct { + unsigned long raw1; + unsigned long raw2; + } raw; }; - /* Fixed part of pointer offset, pointer types only */ - s32 off; /* For PTR_TO_PACKET, used to find other pointers with the same variable * offset, so they can share range knowledge. * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we @@ -311,7 +319,10 @@ struct bpf_insn_aux_data { struct { enum bpf_reg_type reg_type; /* type of pseudo_btf_id */ union { - u32 btf_id; /* btf_id for struct typed var */ + struct { + struct btf *btf; + u32 btf_id; /* btf_id for struct typed var */ + }; u32 mem_size; /* mem_size for non-struct typed var */ }; } btf_var; @@ -459,9 +470,12 @@ int check_ctx_reg(struct bpf_verifier_env *env, /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, - u32 btf_id) + struct btf *btf, u32 btf_id) { - return tgt_prog ? (((u64)tgt_prog->aux->id) << 32 | btf_id) : btf_id; + if (tgt_prog) + return ((u64)tgt_prog->aux->id << 32) | btf_id; + else + return ((u64)btf_obj_id(btf) << 32) | 0x80000000 | btf_id; } int bpf_check_attach_target(struct bpf_verifier_log *log, diff --git a/include/linux/btf.h b/include/linux/btf.h index 2bf641829664..fb608e4de076 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -18,6 +18,7 @@ struct btf_show; extern const struct file_operations btf_fops; +void btf_get(struct btf *btf); void btf_put(struct btf *btf); int btf_new_fd(const union bpf_attr *attr); struct btf *btf_get_by_fd(int fd); @@ -88,7 +89,7 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj, char *buf, int len, u64 flags); int btf_get_fd_by_id(u32 id); -u32 btf_id(const struct btf *btf); +u32 btf_obj_id(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); @@ -206,6 +207,8 @@ static inline const struct btf_var_secinfo *btf_type_var_secinfo( } #ifdef CONFIG_BPF_SYSCALL +struct bpf_prog; + const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); -- cgit v1.2.3 From 290248a5b7d829871b3ea3c62578613a580a1744 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:30 -0800 Subject: bpf: Allow to specify kernel module BTFs when attaching BPF programs Add ability for user-space programs to specify non-vmlinux BTF when attaching BTF-powered BPF programs: raw_tp, fentry/fexit/fmod_ret, LSM, etc. For this, attach_prog_fd (now with the alias name attach_btf_obj_fd) should specify FD of a module or vmlinux BTF object. For backwards compatibility reasons, 0 denotes vmlinux BTF. Only kernel BTF (vmlinux or module) can be specified. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-11-andrii@kernel.org --- include/linux/btf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index fb608e4de076..4c200f5d242b 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -90,6 +90,7 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj, int btf_get_fd_by_id(u32 id); u32 btf_obj_id(const struct btf *btf); +bool btf_is_kernel(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); -- cgit v1.2.3 From 6df3e14436f6ee254b1a4952d90ee8988be59c89 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:02 +0000 Subject: psci: Add accessor for psci_0_1_function_ids Make it possible to retrieve a copy of the psci_0_1_function_ids struct. This is useful for KVM if it is configured to intercept host's PSCI SMCs. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20201202184122.26046-7-dbrazdil@google.com --- include/linux/psci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/psci.h b/include/linux/psci.h index 2a1bfb890e58..4ca0060a3fc4 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -34,6 +34,15 @@ struct psci_operations { extern struct psci_operations psci_ops; +struct psci_0_1_function_ids { + u32 cpu_suspend; + u32 cpu_on; + u32 cpu_off; + u32 migrate; +}; + +struct psci_0_1_function_ids get_psci_0_1_function_ids(void); + #if defined(CONFIG_ARM_PSCI_FW) int __init psci_dt_init(void); #else -- cgit v1.2.3 From 7de3697e9cbd4bd3d62bafa249d57990e1b8f294 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Wed, 2 Dec 2020 16:54:24 -0800 Subject: Add auxiliary bus support Add support for the Auxiliary Bus, auxiliary_device and auxiliary_driver. It enables drivers to create an auxiliary_device and bind an auxiliary_driver to it. The bus supports probe/remove shutdown and suspend/resume callbacks. Each auxiliary_device has a unique string based id; driver binds to an auxiliary_device based on this id through the bus. Co-developed-by: Kiran Patil Co-developed-by: Ranjani Sridharan Co-developed-by: Fred Oh Co-developed-by: Leon Romanovsky Signed-off-by: Kiran Patil Signed-off-by: Ranjani Sridharan Signed-off-by: Fred Oh Signed-off-by: Leon Romanovsky Signed-off-by: Dave Ertman Reviewed-by: Pierre-Louis Bossart Reviewed-by: Shiraz Saleem Reviewed-by: Parav Pandit Reviewed-by: Dan Williams Reviewed-by: Martin Habets Link: https://lore.kernel.org/r/20201113161859.1775473-2-david.m.ertman@intel.com Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/160695681289.505290.8978295443574440604.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 78 +++++++++++++++++++++++++++++++++++++++++ include/linux/mod_devicetable.h | 8 +++++ 2 files changed, 86 insertions(+) create mode 100644 include/linux/auxiliary_bus.h (limited to 'include/linux') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h new file mode 100644 index 000000000000..282fbf7bf9af --- /dev/null +++ b/include/linux/auxiliary_bus.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2019-2020 Intel Corporation + * + * Please see Documentation/driver-api/auxiliary_bus.rst for more information. + */ + +#ifndef _AUXILIARY_BUS_H_ +#define _AUXILIARY_BUS_H_ + +#include +#include +#include + +struct auxiliary_device { + struct device dev; + const char *name; + u32 id; +}; + +struct auxiliary_driver { + int (*probe)(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id); + int (*remove)(struct auxiliary_device *auxdev); + void (*shutdown)(struct auxiliary_device *auxdev); + int (*suspend)(struct auxiliary_device *auxdev, pm_message_t state); + int (*resume)(struct auxiliary_device *auxdev); + const char *name; + struct device_driver driver; + const struct auxiliary_device_id *id_table; +}; + +static inline struct auxiliary_device *to_auxiliary_dev(struct device *dev) +{ + return container_of(dev, struct auxiliary_device, dev); +} + +static inline struct auxiliary_driver *to_auxiliary_drv(struct device_driver *drv) +{ + return container_of(drv, struct auxiliary_driver, driver); +} + +int auxiliary_device_init(struct auxiliary_device *auxdev); +int __auxiliary_device_add(struct auxiliary_device *auxdev, const char *modname); +#define auxiliary_device_add(auxdev) __auxiliary_device_add(auxdev, KBUILD_MODNAME) + +static inline void auxiliary_device_uninit(struct auxiliary_device *auxdev) +{ + put_device(&auxdev->dev); +} + +static inline void auxiliary_device_delete(struct auxiliary_device *auxdev) +{ + device_del(&auxdev->dev); +} + +int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, struct module *owner, + const char *modname); +#define auxiliary_driver_register(auxdrv) \ + __auxiliary_driver_register(auxdrv, THIS_MODULE, KBUILD_MODNAME) + +void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); + +/** + * module_auxiliary_driver() - Helper macro for registering an auxiliary driver + * @__auxiliary_driver: auxiliary driver struct + * + * Helper macro for auxiliary drivers which do not do anything special in + * module init/exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit() + */ +#define module_auxiliary_driver(__auxiliary_driver) \ + module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister) + +struct auxiliary_device * +auxiliary_find_device(struct device *start, const void *data, + int (*match)(struct device *dev, const void *data)); + +#endif /* _AUXILIARY_BUS_H_ */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 5b08a473cdba..c425290b21e2 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -838,4 +838,12 @@ struct mhi_device_id { kernel_ulong_t driver_data; }; +#define AUXILIARY_NAME_SIZE 32 +#define AUXILIARY_MODULE_PREFIX "auxiliary:" + +struct auxiliary_device_id { + char name[AUXILIARY_NAME_SIZE]; + kernel_ulong_t driver_data; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ -- cgit v1.2.3 From 30ae3e13caeaa47884c222ebf5711ce27ed25f19 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 25 Nov 2020 22:29:59 +0100 Subject: mmc: tmio: set max_busy_timeout Set max_busy_timeouts for variants known to support the TOPxx bits in the SD_OPTION register. The timeout mechanism was running in the background but not yet properly handled in the driver. So, let the MMC core know when to not use R1B to avoid unhandled timeouts. My datasheets for older variants (tmio_mmc.c) suggest that they support it, too. However, actual bit descriptions are lacking, so I chose an opt-in approach. Signed-off-by: Wolfram Sang Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20201125213001.15003-2-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 8ba042430d8e..27264fe4b3b9 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -55,7 +55,12 @@ */ #define TMIO_MMC_HAS_IDLE_WAIT BIT(4) -/* BIT(5) is unused */ +/* + * Use the busy timeout feature. Probably all TMIO versions support it. Yet, + * we don't have documentation for old variants, so we enable only known good + * variants with this flag. Can be removed once all variants are known good. + */ +#define TMIO_MMC_USE_BUSY_TIMEOUT BIT(5) /* * Some controllers have CMD12 automatically -- cgit v1.2.3 From 7bbb79ff5f7499e0c5d65987458410e8099207d8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 12:43:47 +0100 Subject: driver core: auxiliary bus: move slab.h from include file No need to include slab.h in include/linux/auxiliary_bus.h, as it is not needed there. Move it to drivers/base/auxiliary.c instead. Cc: Dan Williams Cc: Dave Ertman Cc: Fred Oh Cc: Kiran Patil Cc: Leon Romanovsky Cc: Martin Habets Cc: Parav Pandit Cc: Pierre-Louis Bossart Cc: Ranjani Sridharan Cc: Shiraz Saleem Link: https://lore.kernel.org/r/X8og8xi3WkoYXet9@kroah.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 282fbf7bf9af..3580743d0e8d 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -10,7 +10,6 @@ #include #include -#include struct auxiliary_device { struct device dev; -- cgit v1.2.3 From 8142a46c50d2dd8160c42284e1044eed3bec0d18 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 12:44:07 +0100 Subject: driver core: auxiliary bus: make remove function return void There's an effort to move the remove() callback in the driver core to not return an int, as nothing can be done if this function fails. To make that effort easier, make the aux bus remove function void to start with so that no users have to be changed sometime in the future. Cc: Dan Williams Cc: Dave Ertman Cc: Fred Oh Cc: Kiran Patil Cc: Leon Romanovsky Cc: Martin Habets Cc: Parav Pandit Cc: Pierre-Louis Bossart Cc: Ranjani Sridharan Cc: Shiraz Saleem Link: https://lore.kernel.org/r/X8ohB1ks1NK7kPop@kroah.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 3580743d0e8d..d67b17606210 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -19,7 +19,7 @@ struct auxiliary_device { struct auxiliary_driver { int (*probe)(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id); - int (*remove)(struct auxiliary_device *auxdev); + void (*remove)(struct auxiliary_device *auxdev); void (*shutdown)(struct auxiliary_device *auxdev); int (*suspend)(struct auxiliary_device *auxdev, pm_message_t state); int (*resume)(struct auxiliary_device *auxdev); -- cgit v1.2.3 From 0d2bf11a6b3e275a526b8d42d8d4a3a6067cf953 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 12:49:28 +0100 Subject: driver core: auxiliary bus: minor coding style tweaks For some reason, the original aux bus patch had some really long lines in a few places, probably due to it being a very long-lived patch in development by many different people. Fix that up so that the two files all have the same length lines and function formatting styles. Cc: Dan Williams Cc: Dave Ertman Cc: Fred Oh Cc: Kiran Patil Cc: Leon Romanovsky Cc: Martin Habets Cc: Parav Pandit Cc: Pierre-Louis Bossart Cc: Ranjani Sridharan Cc: Shiraz Saleem Link: https://lore.kernel.org/r/X8oiSFTpYHw1xE/o@kroah.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index d67b17606210..fc51d45f106b 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -70,8 +70,8 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); #define module_auxiliary_driver(__auxiliary_driver) \ module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister) -struct auxiliary_device * -auxiliary_find_device(struct device *start, const void *data, - int (*match)(struct device *dev, const void *data)); +struct auxiliary_device *auxiliary_find_device(struct device *start, + const void *data, + int (*match)(struct device *dev, const void *data)); #endif /* _AUXILIARY_BUS_H_ */ -- cgit v1.2.3 From 17a7612b99e66d2539341ab4f888f970c2c7f76d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 4 Oct 2020 14:30:58 +0300 Subject: net/mlx5_core: Clean driver version and name Remove exposed driver version as it was done in other drivers, so module version will work correctly by displaying the kernel version for which it is compiled. And move mlx5_core module name to general include, so auxiliary drivers will be able to use it as a basis for a name in their device ID tables. Reviewed-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0f23e1ed5e71..5f04495c33d7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -56,6 +56,8 @@ #include #include +#define MLX5_ADEV_NAME "mlx5_core" + enum { MLX5_BOARD_ID_LEN = 64, }; -- cgit v1.2.3 From 0aae392bea4da1a2a9f2e22683c0fa751dc07333 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 6 Oct 2020 12:34:25 +0300 Subject: vdpa/mlx5: Make hardware definitions visible to all mlx5 devices Move mlx5_vdpa IFC header file to the general include folder, so mlx5_core will be able to reuse it to check if VDPA is supported prior to creating an auxiliary device. As part of this move, update the header file name to mlx5 general naming scheme. Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc_vdpa.h | 166 +++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 include/linux/mlx5/mlx5_ifc_vdpa.h (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h new file mode 100644 index 000000000000..98b56b75c625 --- /dev/null +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#ifndef __MLX5_IFC_VDPA_H_ +#define __MLX5_IFC_VDPA_H_ + +enum { + MLX5_VIRTIO_Q_EVENT_MODE_NO_MSIX_MODE = 0x0, + MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE = 0x1, + MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE = 0x2, +}; + +enum { + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = 0x1, // do I check this caps? + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = 0x2, +}; + +enum { + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0, + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1, +}; + +struct mlx5_ifc_virtio_q_bits { + u8 virtio_q_type[0x8]; + u8 reserved_at_8[0x5]; + u8 event_mode[0x3]; + u8 queue_index[0x10]; + + u8 full_emulation[0x1]; + u8 virtio_version_1_0[0x1]; + u8 reserved_at_22[0x2]; + u8 offload_type[0x4]; + u8 event_qpn_or_msix[0x18]; + + u8 doorbell_stride_index[0x10]; + u8 queue_size[0x10]; + + u8 device_emulation_id[0x20]; + + u8 desc_addr[0x40]; + + u8 used_addr[0x40]; + + u8 available_addr[0x40]; + + u8 virtio_q_mkey[0x20]; + + u8 max_tunnel_desc[0x10]; + u8 reserved_at_170[0x8]; + u8 error_type[0x8]; + + u8 umem_1_id[0x20]; + + u8 umem_1_size[0x20]; + + u8 umem_1_offset[0x40]; + + u8 umem_2_id[0x20]; + + u8 umem_2_size[0x20]; + + u8 umem_2_offset[0x40]; + + u8 umem_3_id[0x20]; + + u8 umem_3_size[0x20]; + + u8 umem_3_offset[0x40]; + + u8 counter_set_id[0x20]; + + u8 reserved_at_320[0x8]; + u8 pd[0x18]; + + u8 reserved_at_340[0xc0]; +}; + +struct mlx5_ifc_virtio_net_q_object_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x20]; + + u8 vhca_id[0x10]; + u8 reserved_at_70[0x10]; + + u8 queue_feature_bit_mask_12_3[0xa]; + u8 dirty_bitmap_dump_enable[0x1]; + u8 vhost_log_page[0x5]; + u8 reserved_at_90[0xc]; + u8 state[0x4]; + + u8 reserved_at_a0[0x5]; + u8 queue_feature_bit_mask_2_0[0x3]; + u8 tisn_or_qpn[0x18]; + + u8 dirty_bitmap_mkey[0x20]; + + u8 dirty_bitmap_size[0x20]; + + u8 dirty_bitmap_addr[0x40]; + + u8 hw_available_index[0x10]; + u8 hw_used_index[0x10]; + + u8 reserved_at_160[0xa0]; + + struct mlx5_ifc_virtio_q_bits virtio_q_context; +}; + +struct mlx5_ifc_create_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +struct mlx5_ifc_create_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_destroy_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_destroy_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_query_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; +}; + +struct mlx5_ifc_query_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +enum { + MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, + MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, + MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, +}; + +enum { + MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT = 0x0, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY = 0x1, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND = 0x2, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3, +}; + +enum { + MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0, + MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1, +}; + +struct mlx5_ifc_modify_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +struct mlx5_ifc_modify_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +#endif /* __MLX5_IFC_VDPA_H_ */ -- cgit v1.2.3 From a925b5e309c9b998658a6a94dbb53154ea901299 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 8 Oct 2020 16:06:37 +0300 Subject: net/mlx5: Register mlx5 devices to auxiliary virtual bus Create auxiliary devices under new virtual bus. This will replace the custom-made mlx5 ->add()/->remove() interfaces and next patches will fill the missing callback and remove the old interface logic. The attachment of auxiliary drivers to the devices is possible in 1-to-1 manner only and it requires us to create device for every protocol, so that device (module) will be able to connect to it. System with 2 IB and 1 RoCE cards: [leonro@vm ~]$ lspci |grep nox 00:09.0 Ethernet controller: Mellanox Technologies MT27800 Family [ConnectX-5] 00:0a.0 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6] 00:0b.0 Ethernet controller: Mellanox Technologies MT2910 Family [ConnectX-7] [leonro@vm ~]$ ls -l /sys/bus/auxiliary/devices/ mlx5_core.eth.2 -> ../../../devices/pci0000:00/0000:00:0b.0/mlx5_core.eth.2 mlx5_core.rdma.0 -> ../../../devices/pci0000:00/0000:00:09.0/mlx5_core.rdma.0 mlx5_core.rdma.1 -> ../../../devices/pci0000:00/0000:00:0a.0/mlx5_core.rdma.1 mlx5_core.rdma.2 -> ../../../devices/pci0000:00/0000:00:0b.0/mlx5_core.rdma.2 mlx5_core.vdpa.1 -> ../../../devices/pci0000:00/0000:00:0a.0/mlx5_core.vdpa.1 mlx5_core.vdpa.2 -> ../../../devices/pci0000:00/0000:00:0b.0/mlx5_core.vdpa.2 [leonro@vm ~]$ rdma dev 0: ibp0s9: node_type ca fw 4.6.9999 node_guid 5254:00c0:fe12:3455 sys_image_guid 5254:00c0:fe12:3455 1: ibp0s10: node_type ca fw 4.6.9999 node_guid 5254:00c0:fe12:3456 sys_image_guid 5254:00c0:fe12:3456 2: rdmap0s11: node_type ca fw 4.6.9999 node_guid 5254:00c0:fe12:3457 sys_image_guid 5254:00c0:fe12:3457 System with RoCE SR-IOV card with 4 VFs: [leonro@vm ~]$ lspci |grep nox 01:00.0 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6] 01:00.1 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6 Virtual Function] 01:00.2 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6 Virtual Function] 01:00.3 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6 Virtual Function] 01:00.4 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6 Virtual Function] [leonro@vm ~]$ ls -l /sys/bus/auxiliary/devices/ mlx5_core.eth.0 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.0/mlx5_core.eth.0 mlx5_core.eth.1 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.1/mlx5_core.eth.1 mlx5_core.eth.2 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.2/mlx5_core.eth.2 mlx5_core.eth.3 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.3/mlx5_core.eth.3 mlx5_core.eth.4 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.4/mlx5_core.eth.4 mlx5_core.rdma.0 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.0/mlx5_core.rdma.0 mlx5_core.rdma.1 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.1/mlx5_core.rdma.1 mlx5_core.rdma.2 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.2/mlx5_core.rdma.2 mlx5_core.rdma.3 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.3/mlx5_core.rdma.3 mlx5_core.rdma.4 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.4/mlx5_core.rdma.4 mlx5_core.vdpa.1 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.1/mlx5_core.vdpa.1 mlx5_core.vdpa.2 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.2/mlx5_core.vdpa.2 mlx5_core.vdpa.3 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.3/mlx5_core.vdpa.3 mlx5_core.vdpa.4 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.4/mlx5_core.vdpa.4 [leonro@vm ~]$ rdma dev 0: rocep1s0f0: node_type ca fw 4.6.9999 node_guid 5254:00c0:fe12:3455 sys_image_guid 5254:00c0:fe12:3455 1: rocep1s0f0v0: node_type ca fw 4.6.9999 node_guid 0000:0000:0000:0000 sys_image_guid 5254:00c0:fe12:3456 2: rocep1s0f0v1: node_type ca fw 4.6.9999 node_guid 0000:0000:0000:0000 sys_image_guid 5254:00c0:fe12:3457 3: rocep1s0f0v2: node_type ca fw 4.6.9999 node_guid 0000:0000:0000:0000 sys_image_guid 5254:00c0:fe12:3458 4: rocep1s0f0v3: node_type ca fw 4.6.9999 node_guid 0000:0000:0000:0000 sys_image_guid 5254:00c0:fe12:3459 Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5f04495c33d7..e31c72693bcf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -536,6 +537,17 @@ struct mlx5_core_roce { struct mlx5_flow_handle *allow_rule; }; +enum { + MLX5_PRIV_FLAGS_DISABLE_IB_ADEV = 1 << 0, + MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV = 1 << 1, +}; + +struct mlx5_adev { + struct auxiliary_device adev; + struct mlx5_core_dev *mdev; + int idx; +}; + struct mlx5_priv { /* IRQ table valid only for real pci devices PF or VF */ struct mlx5_irq_table *irq_table; @@ -573,6 +585,8 @@ struct mlx5_priv { struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; + struct mlx5_adev **adev; + int adev_idx; struct mlx5_events *events; struct mlx5_flow_steering *steering; @@ -580,6 +594,7 @@ struct mlx5_priv { struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; struct mlx5_lag *lag; + u32 flags; struct mlx5_devcom *devcom; struct mlx5_fw_reset *fw_reset; struct mlx5_core_roce roce; @@ -1062,9 +1077,14 @@ enum { }; enum { - MLX5_INTERFACE_PROTOCOL_IB = 0, - MLX5_INTERFACE_PROTOCOL_ETH = 1, - MLX5_INTERFACE_PROTOCOL_VDPA = 2, + MLX5_INTERFACE_PROTOCOL_ETH_REP, + MLX5_INTERFACE_PROTOCOL_ETH, + + MLX5_INTERFACE_PROTOCOL_IB_REP, + MLX5_INTERFACE_PROTOCOL_MPIB, + MLX5_INTERFACE_PROTOCOL_IB, + + MLX5_INTERFACE_PROTOCOL_VDPA, }; struct mlx5_interface { -- cgit v1.2.3 From 62dcd9c59f324e484c1d655884e0101a988f6671 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:13 +0100 Subject: earlycon: simplify earlycon-table implementation Instead of using the array-of-pointers trick to avoid having gcc mess up the earlycon array stride, specify type alignment when declaring entries to prevent gcc from increasing alignment. This is essentially an alternative (one-line) fix to the problem addressed by commit dd709e72cb93 ("earlycon: Use a pointer table to fix __earlycon_table stride"). gcc can increase the alignment of larger objects with static extent as an optimisation, but this can be suppressed by using the aligned attribute when declaring variables. Note that we have been relying on this behaviour for kernel parameters for 16 years and it indeed hasn't changed since the introduction of the aligned attribute in gcc-3.1. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20201123102319.8090-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index ff63c2963359..3e32b788c28d 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -357,8 +357,8 @@ struct earlycon_id { int (*setup)(struct earlycon_device *, const char *options); }; -extern const struct earlycon_id *__earlycon_table[]; -extern const struct earlycon_id *__earlycon_table_end[]; +extern const struct earlycon_id __earlycon_table[]; +extern const struct earlycon_id __earlycon_table_end[]; #if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE) #define EARLYCON_USED_OR_UNUSED __used @@ -366,19 +366,13 @@ extern const struct earlycon_id *__earlycon_table_end[]; #define EARLYCON_USED_OR_UNUSED __maybe_unused #endif -#define _OF_EARLYCON_DECLARE(_name, compat, fn, unique_id) \ - static const struct earlycon_id unique_id \ - EARLYCON_USED_OR_UNUSED __initconst \ +#define OF_EARLYCON_DECLARE(_name, compat, fn) \ + static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name) \ + EARLYCON_USED_OR_UNUSED __section("__earlycon_table") \ + __aligned(__alignof__(struct earlycon_id)) \ = { .name = __stringify(_name), \ .compatible = compat, \ - .setup = fn }; \ - static const struct earlycon_id EARLYCON_USED_OR_UNUSED \ - __section("__earlycon_table") \ - * const __PASTE(__p, unique_id) = &unique_id - -#define OF_EARLYCON_DECLARE(_name, compat, fn) \ - _OF_EARLYCON_DECLARE(_name, compat, fn, \ - __UNIQUE_ID(__earlycon_##_name)) + .setup = fn }; #define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn) -- cgit v1.2.3 From 5812b32e01c6d86ba7a84110702b46d8a8531fe9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:12 +0100 Subject: of: fix linker-section match-table corruption Specify type alignment when declaring linker-section match-table entries to prevent gcc from increasing alignment and corrupting the various tables with padding (e.g. timers, irqchips, clocks, reserved memory). This is specifically needed on x86 where gcc (typically) aligns larger objects like struct of_device_id with static extent on 32-byte boundaries which at best prevents matching on anything but the first entry. Specifying alignment when declaring variables suppresses this optimisation. Here's a 64-bit example where all entries are corrupt as 16 bytes of padding has been inserted before the first entry: ffffffff8266b4b0 D __clk_of_table ffffffff8266b4c0 d __of_table_fixed_factor_clk ffffffff8266b5a0 d __of_table_fixed_clk ffffffff8266b680 d __clk_of_table_sentinel And here's a 32-bit example where the 8-byte-aligned table happens to be placed on a 32-byte boundary so that all but the first entry are corrupt due to the 28 bytes of padding inserted between entries: 812b3ec0 D __irqchip_of_table 812b3ec0 d __of_table_irqchip1 812b3fa0 d __of_table_irqchip2 812b4080 d __of_table_irqchip3 812b4160 d irqchip_of_match_end Verified on x86 using gcc-9.3 and gcc-4.9 (which uses 64-byte alignment), and on arm using gcc-7.2. Note that there are no in-tree users of these tables on x86 currently (even if they are included in the image). Fixes: 54196ccbe0ba ("of: consolidate linker section OF match table declarations") Fixes: f6e916b82022 ("irqchip: add basic infrastructure") Cc: stable # 3.9 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20201123102319.8090-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/of.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 5d51891cbf1a..af655d264f10 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1300,6 +1300,7 @@ static inline int of_get_available_child_count(const struct device_node *np) #define _OF_DECLARE(table, name, compat, fn, fn_type) \ static const struct of_device_id __of_table_##name \ __used __section("__" #table "_of_table") \ + __aligned(__alignof__(struct of_device_id)) \ = { .compatible = compat, \ .data = (fn == (fn_type)NULL) ? fn : fn } #else -- cgit v1.2.3 From e0efb3168d34dc8c8c72718672b8902e40efff8f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 3 Dec 2020 03:03:31 +0100 Subject: tty: Remove dead termiox code set_termiox() and the TCGETX handler bail out with -EINVAL immediately if ->termiox is NULL, but there are no code paths that can set ->termiox to a non-NULL pointer; and no such code paths seem to have existed since the termiox mechanism was introduced back in commit 1d65b4a088de ("tty: Add termiox") in v2.6.28. Similarly, no driver actually implements .set_termiox; and it looks like no driver ever has. Delete this dead code; but leave the definition of struct termiox in the UAPI headers intact. Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20201203020331.2394754-1-jannh@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 - include/linux/tty_driver.h | 9 --------- 2 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 10212c6e4345..67c7a07c8083 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -303,7 +303,6 @@ struct tty_struct { spinlock_t flow_lock; /* Termios values are protected by the termios rwsem */ struct ktermios termios, termios_locked; - struct termiox *termiox; /* May be NULL for unsupported */ char name[64]; struct pid *pgrp; /* Protected by ctrl lock */ struct pid *session; diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 358446247ccd..61c3372d3f32 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -224,14 +224,6 @@ * line). See tty_do_resize() if you need to wrap the standard method * in your own logic - the usual case. * - * void (*set_termiox)(struct tty_struct *tty, struct termiox *new); - * - * Called when the device receives a termiox based ioctl. Passes down - * the requested data from user space. This method will not be invoked - * unless the tty also has a valid tty->termiox pointer. - * - * Optional: Called under the termios lock - * * int (*get_icount)(struct tty_struct *tty, struct serial_icounter *icount); * * Called when the device receives a TIOCGICOUNT ioctl. Passed a kernel @@ -285,7 +277,6 @@ struct tty_operations { int (*tiocmset)(struct tty_struct *tty, unsigned int set, unsigned int clear); int (*resize)(struct tty_struct *tty, struct winsize *ws); - int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew); int (*get_icount)(struct tty_struct *tty, struct serial_icounter_struct *icount); int (*get_serial)(struct tty_struct *tty, struct serial_struct *p); -- cgit v1.2.3 From c8bcd9c5be24fb9e6132e97da5a35e55a83e36b9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 3 Dec 2020 02:25:05 +0100 Subject: tty: Fix ->session locking Currently, locking of ->session is very inconsistent; most places protect it using the legacy tty mutex, but disassociate_ctty(), __do_SAK(), tiocspgrp() and tiocgsid() don't. Two of the writers hold the ctrl_lock (because they already need it for ->pgrp), but __proc_set_tty() doesn't do that yet. On a PREEMPT=y system, an unprivileged user can theoretically abuse this broken locking to read 4 bytes of freed memory via TIOCGSID if tiocgsid() is preempted long enough at the right point. (Other things might also go wrong, especially if root-only ioctls are involved; I'm not sure about that.) Change the locking on ->session such that: - tty_lock() is held by all writers: By making disassociate_ctty() hold it. This should be fine because the same lock can already be taken through the call to tty_vhangup_session(). The tricky part is that we need to shorten the area covered by siglock to be able to take tty_lock() without ugly retry logic; as far as I can tell, this should be fine, since nothing in the signal_struct is touched in the `if (tty)` branch. - ctrl_lock is held by all writers: By changing __proc_set_tty() to hold the lock a little longer. - All readers that aren't holding tty_lock() hold ctrl_lock: By adding locking to tiocgsid() and __do_SAK(), and expanding the area covered by ctrl_lock in tiocspgrp(). Cc: stable@kernel.org Signed-off-by: Jann Horn Reviewed-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index a99e9b8e4e31..eb33d948788c 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -306,6 +306,10 @@ struct tty_struct { struct termiox *termiox; /* May be NULL for unsupported */ char name[64]; struct pid *pgrp; /* Protected by ctrl lock */ + /* + * Writes protected by both ctrl lock and legacy mutex, readers must use + * at least one of them. + */ struct pid *session; unsigned long flags; int count; -- cgit v1.2.3 From a54895fa057c67700270777f7661d8d3c7fda88a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Dec 2020 17:21:39 +0100 Subject: block: remove the request_queue to argument request based tracepoints The request_queue can trivially be derived from the request. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3b6ff5902edc..05556573b896 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -75,8 +75,7 @@ static inline bool blk_trace_note_message_enabled(struct request_queue *q) return ret; } -extern void blk_add_driver_data(struct request_queue *q, struct request *rq, - void *data, size_t len); +extern void blk_add_driver_data(struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg); @@ -90,7 +89,7 @@ extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) # define blk_trace_shutdown(q) do { } while (0) -# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_add_driver_data(rq, data, len) do {} while (0) # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) -- cgit v1.2.3 From c9d659b60770db94b898f94947192a94bbf95c5c Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 20 Nov 2020 16:10:23 -0800 Subject: PCI/ERR: Bind RCEC devices to the Root Port driver If a Root Complex Integrated Endpoint (RCiEP) is implemented, it may signal errors through a Root Complex Event Collector (RCEC). Each RCiEP must be associated with no more than one RCEC. For an RCEC (which is technically not a Bridge), error messages "received" from associated RCiEPs must be enabled for "transmission" in order to cause a System Error via the Root Control register or (when the Advanced Error Reporting Capability is present) reporting via the Root Error Command register and logging in the Root Error Status register and Error Source Identification register. Given the commonality with Root Ports and the need to also support AER and PME services for RCECs, extend the Root Port driver to support RCEC devices by adding the RCEC Class ID to the driver structure. Co-developed-by: Sean V Kelley Link: https://lore.kernel.org/r/20201121001036.8560-3-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Sean V Kelley Signed-off-by: Qiuxu Zhuo Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1ab1e24bcbce..d8156a5dbee8 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -81,6 +81,7 @@ #define PCI_CLASS_SYSTEM_RTC 0x0803 #define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804 #define PCI_CLASS_SYSTEM_SDHCI 0x0805 +#define PCI_CLASS_SYSTEM_RCEC 0x0807 #define PCI_CLASS_SYSTEM_OTHER 0x0880 #define PCI_BASE_CLASS_INPUT 0x09 -- cgit v1.2.3 From 90655631988f8f501529e6de5f13614389717ead Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:24 -0800 Subject: PCI/ERR: Cache RCEC EA Capability offset in pci_init_capabilities() Extend support for Root Complex Event Collectors by decoding and caching the RCEC Endpoint Association Extended Capabilities when enumerating. Use that cached information for later error source reporting. See PCIe r5.0, sec 7.9.10. Co-developed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20201121001036.8560-4-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Qiuxu Zhuo Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 22207a79762c..f8c927fd0602 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -304,6 +304,7 @@ struct pcie_link_state; struct pci_vpd; struct pci_sriov; struct pci_p2pdma; +struct rcec_ea; /* The pci_dev structure describes PCI devices */ struct pci_dev { @@ -326,6 +327,9 @@ struct pci_dev { #ifdef CONFIG_PCIEAER u16 aer_cap; /* AER capability offset */ struct aer_stats *aer_stats; /* AER stats for this device */ +#endif +#ifdef CONFIG_PCIEPORTBUS + struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ #endif u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ -- cgit v1.2.3 From 3ee16db390b42b8a21f2ad2ea2518f3469c6e532 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 30 Nov 2020 10:57:43 -0500 Subject: dm: fix IO splitting Commit 882ec4e609c1 ("dm table: stack 'chunk_sectors' limit to account for target-specific splitting") caused a couple regressions: 1) Using lcm_not_zero() when stacking chunk_sectors was a bug because chunk_sectors must reflect the most limited of all devices in the IO stack. 2) DM targets that set max_io_len but that do _not_ provide an .iterate_devices method no longer had there IO split properly. And commit 5091cdec56fa ("dm: change max_io_len() to use blk_max_size_offset()") also caused a regression where DM no longer supported varied (per target) IO splitting. The implication being the potential for severely reduced performance for IO stacks that use a DM target like dm-cache to hide performance limitations of a slower device (e.g. one that requires 4K IO splitting). Coming full circle: Fix all these issues by discontinuing stacking chunk_sectors up using ti->max_io_len in dm_calculate_queue_limits(), add optional chunk_sectors override argument to blk_max_size_offset() and update DM's max_io_len() to pass ti->max_io_len to its blk_max_size_offset() call. Passing in an optional chunk_sectors override to blk_max_size_offset() allows for code reuse of block's centralized calculation for max IO size based on provided offset and split boundary. Fixes: 882ec4e609c1 ("dm table: stack 'chunk_sectors' limit to account for target-specific splitting") Fixes: 5091cdec56fa ("dm: change max_io_len() to use blk_max_size_offset()") Cc: stable@vger.kernel.org Reported-by: John Dorminy Reported-by: Bruce Johnston Reported-by: Kirill Tkhai Reviewed-by: John Dorminy Signed-off-by: Mike Snitzer Reviewed-by: Jens Axboe --- include/linux/blkdev.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 639cae2c158b..24ae504cf77d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1073,11 +1073,12 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, * file system requests. */ static inline unsigned int blk_max_size_offset(struct request_queue *q, - sector_t offset) + sector_t offset, + unsigned int chunk_sectors) { - unsigned int chunk_sectors = q->limits.chunk_sectors; - - if (!chunk_sectors) + if (!chunk_sectors && q->limits.chunk_sectors) + chunk_sectors = q->limits.chunk_sectors; + else return q->limits.max_sectors; if (likely(is_power_of_2(chunk_sectors))) @@ -1101,7 +1102,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, req_op(rq) == REQ_OP_SECURE_ERASE) return blk_queue_get_max_sectors(q, req_op(rq)); - return min(blk_max_size_offset(q, offset), + return min(blk_max_size_offset(q, offset, 0), blk_queue_get_max_sectors(q, req_op(rq))); } -- cgit v1.2.3 From f646c2a0a6685a8a30a150509b112c911b8feff3 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Sun, 29 Nov 2020 22:16:26 +0530 Subject: PCI: Return u8 from pci_find_capability() and similar PCI Capabilities are linked in a list that must appear in the first 256 bytes of config space. Each capabilities list pointer is 8 bits. Change the return type of pci_find_capability() and supporting functions from int to u8 to match the specification. [bhelgaas: change other related interfaces, fix HyperTransport typos] Link: https://lore.kernel.org/r/20201129164626.12887-1-puranjay12@gmail.com Signed-off-by: Puranjay Mohan Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e007bc3e8b6e..e615f8abdd79 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1064,12 +1064,13 @@ void pci_sort_breadthfirst(void); /* Generic PCI functions exported to card drivers */ -int pci_find_capability(struct pci_dev *dev, int cap); -int pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap); +u8 pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); +u8 pci_find_capability(struct pci_dev *dev, int cap); +u8 pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap); +u8 pci_find_ht_capability(struct pci_dev *dev, int ht_cap); +u8 pci_find_next_ht_capability(struct pci_dev *dev, u8 pos, int ht_cap); int pci_find_ext_capability(struct pci_dev *dev, int cap); int pci_find_next_ext_capability(struct pci_dev *dev, int pos, int cap); -int pci_find_ht_capability(struct pci_dev *dev, int ht_cap); -int pci_find_next_ht_capability(struct pci_dev *dev, int pos, int ht_cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); u64 pci_get_dsn(struct pci_dev *dev); @@ -1280,7 +1281,6 @@ void set_pcie_port_type(struct pci_dev *pdev); void set_pcie_hotplug_bridge(struct pci_dev *pdev); /* Functions for PCI Hotplug drivers to use */ -int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge); unsigned int pci_rescan_bus(struct pci_bus *bus); void pci_lock_rescan_remove(void); @@ -1720,7 +1720,7 @@ static inline int __pci_register_driver(struct pci_driver *drv, static inline int pci_register_driver(struct pci_driver *drv) { return 0; } static inline void pci_unregister_driver(struct pci_driver *drv) { } -static inline int pci_find_capability(struct pci_dev *dev, int cap) +static inline u8 pci_find_capability(struct pci_dev *dev, int cap) { return 0; } static inline int pci_find_next_capability(struct pci_dev *dev, u8 post, int cap) -- cgit v1.2.3 From ee8b1c478a9fbce9c64151ee561c124c4dcd66be Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 4 Dec 2020 15:14:07 -0600 Subject: PCI: Return u16 from pci_find_ext_capability() and similar PCI Express Extended Capabilities are in config space between offsets 256 and 4K. These offsets all fit in 16 bits. Change the return type of pci_find_ext_capability() and supporting functions from int to u16 to match the specification. Many callers use "int", which is fine, but there's no need to store more than a u16. Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e615f8abdd79..441e5753da0c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -381,7 +381,7 @@ struct pci_dev { struct pcie_link_state *link_state; /* ASPM link state */ unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ - int l1ss; /* L1SS Capability pointer */ + u16 l1ss; /* L1SS Capability pointer */ #endif unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */ @@ -1069,8 +1069,8 @@ u8 pci_find_capability(struct pci_dev *dev, int cap); u8 pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap); u8 pci_find_ht_capability(struct pci_dev *dev, int ht_cap); u8 pci_find_next_ht_capability(struct pci_dev *dev, u8 pos, int ht_cap); -int pci_find_ext_capability(struct pci_dev *dev, int cap); -int pci_find_next_ext_capability(struct pci_dev *dev, int pos, int cap); +u16 pci_find_ext_capability(struct pci_dev *dev, int cap); +u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); u64 pci_get_dsn(struct pci_dev *dev); -- cgit v1.2.3 From dba4a9256bb4d78ef89aaad5f49787aa27fcd5b4 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Fri, 4 Dec 2020 12:36:04 +0100 Subject: net: Remove the err argument from sock_from_file Currently, the sock_from_file prototype takes an "err" pointer that is either not set or set to -ENOTSOCK IFF the returned socket is NULL. This makes the error redundant and it is ignored by a few callers. This patch simplifies the API by letting callers deduce the error based on whether the returned socket is NULL or not. Suggested-by: Al Viro Signed-off-by: Florent Revest Signed-off-by: Daniel Borkmann Reviewed-by: KP Singh Link: https://lore.kernel.org/bpf/20201204113609.1850150-1-revest@google.com --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 0dcd51feef02..9e2324efc26a 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -240,7 +240,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg); int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags); struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname); struct socket *sockfd_lookup(int fd, int *err); -struct socket *sock_from_file(struct file *file, int *err); +struct socket *sock_from_file(struct file *file); #define sockfd_put(sock) fput(sock->file) int net_ratelimit(void); -- cgit v1.2.3 From 65f33b35722952fa076811d5686bfd8a611a80fa Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 4 Dec 2020 17:21:03 -0500 Subject: block: fix incorrect branching in blk_max_size_offset() If non-zero 'chunk_sectors' is passed in to blk_max_size_offset() that override will be incorrectly ignored. Old blk_max_size_offset() branching, prior to commit 3ee16db390b4, must be used only if passed 'chunk_sectors' override is zero. Fixes: 3ee16db390b4 ("dm: fix IO splitting") Cc: stable@vger.kernel.org # 5.9 Reported-by: John Dorminy Signed-off-by: Mike Snitzer --- include/linux/blkdev.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 24ae504cf77d..033eb5f73b65 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1076,10 +1076,12 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, sector_t offset, unsigned int chunk_sectors) { - if (!chunk_sectors && q->limits.chunk_sectors) - chunk_sectors = q->limits.chunk_sectors; - else - return q->limits.max_sectors; + if (!chunk_sectors) { + if (q->limits.chunk_sectors) + chunk_sectors = q->limits.chunk_sectors; + else + return q->limits.max_sectors; + } if (likely(is_power_of_2(chunk_sectors))) chunk_sectors -= offset & (chunk_sectors - 1); -- cgit v1.2.3 From 99efde6c9bb7b42eac0459876bf964fe08e5cef9 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 25 Nov 2020 12:07:33 +0300 Subject: PCI/PM: Rename pci_wakeup_bus() to pci_resume_bus() A "wakeup" is a signal from a device telling the system that the device or the whole system should be awakened and made active. PCI devices are made active by "resuming" them. pci_wakeup_bus() is not involved with the wakeup signal; it *resumes* devices on a bus (possibly in response to a wakeup signal, but that's at a higher level). Rename pci_wakeup_bus() to pci_resume_bus() to better reflect what it does. No functional change intended. [bhelgaas: commit log, reorder before removal of pci_wakeup_event()] Link: https://lore.kernel.org/r/20201125090733.77782-2-mika.westerberg@linux.intel.com Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 22207a79762c..9256ef2e4327 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1271,7 +1271,7 @@ bool pci_dev_run_wake(struct pci_dev *dev); void pci_d3cold_enable(struct pci_dev *dev); void pci_d3cold_disable(struct pci_dev *dev); bool pcie_relaxed_ordering_enabled(struct pci_dev *dev); -void pci_wakeup_bus(struct pci_bus *bus); +void pci_resume_bus(struct pci_bus *bus); void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state); /* For use by arch with custom probe code */ -- cgit v1.2.3 From ed9b25d1970a4787ac6a39c2091e63b127ecbfc1 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Sun, 15 Nov 2020 21:55:31 -0600 Subject: [SECURITY] fix namespaced fscaps when !CONFIG_SECURITY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Namespaced file capabilities were introduced in 8db6c34f1dbc . When userspace reads an xattr for a namespaced capability, a virtualized representation of it is returned if the caller is in a user namespace owned by the capability's owning rootid. The function which performs this virtualization was not hooked up if CONFIG_SECURITY=n. Therefore in that case the original xattr was shown instead of the virtualized one. To test this using libcap-bin (*1), $ v=$(mktemp) $ unshare -Ur setcap cap_sys_admin-eip $v $ unshare -Ur setcap -v cap_sys_admin-eip $v /tmp/tmp.lSiIFRvt8Y: OK "setcap -v" verifies the values instead of setting them, and will check whether the rootid value is set. Therefore, with this bug un-fixed, and with CONFIG_SECURITY=n, setcap -v will fail: $ v=$(mktemp) $ unshare -Ur setcap cap_sys_admin=eip $v $ unshare -Ur setcap -v cap_sys_admin=eip $v nsowner[got=1000, want=0],/tmp/tmp.HHDiOOl9fY differs in [] Fix this bug by calling cap_inode_getsecurity() in security_inode_getsecurity() instead of returning -EOPNOTSUPP, when CONFIG_SECURITY=n. *1 - note, if libcap is too old for getcap to have the '-n' option, then use verify-caps instead. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=209689 Cc: Hervé Guillemet Acked-by: Casey Schaufler Signed-off-by: Serge Hallyn Signed-off-by: Andrew G. Morgan Signed-off-by: James Morris --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 0a0a03b36a3b..2befc0a25eb3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -864,7 +864,7 @@ static inline int security_inode_killpriv(struct dentry *dentry) static inline int security_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc) { - return -EOPNOTSUPP; + return cap_inode_getsecurity(inode, name, buffer, alloc); } static inline int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags) -- cgit v1.2.3 From 507b460f814458605c47b0ed03c11e49a712fc08 Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:32 -0800 Subject: PCI/ERR: Add pcie_link_rcec() to associate RCiEPs A Root Complex Event Collector terminates error and PME messages from associated RCiEPs. Use the RCEC Endpoint Association Extended Capability to identify associated RCiEPs. Link the associated RCiEPs as the RCECs are enumerated. Co-developed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20201121001036.8560-12-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Qiuxu Zhuo Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f8c927fd0602..7c7d2d23e8a3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -330,6 +330,7 @@ struct pci_dev { #endif #ifdef CONFIG_PCIEPORTBUS struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ + struct pci_dev *rcec; /* Associated RCEC device */ #endif u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ -- cgit v1.2.3 From 601c10c89cbb32b9123d8716d193e6d1a8e5300d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Oct 2020 11:13:56 +0300 Subject: net/mlx5: Delete custom device management logic After conversion to use auxiliary bus, all custom device management is not needed anymore, delete it. Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e31c72693bcf..54299305a2c8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1076,28 +1076,6 @@ enum { MAX_MR_CACHE_ENTRIES }; -enum { - MLX5_INTERFACE_PROTOCOL_ETH_REP, - MLX5_INTERFACE_PROTOCOL_ETH, - - MLX5_INTERFACE_PROTOCOL_IB_REP, - MLX5_INTERFACE_PROTOCOL_MPIB, - MLX5_INTERFACE_PROTOCOL_IB, - - MLX5_INTERFACE_PROTOCOL_VDPA, -}; - -struct mlx5_interface { - void * (*add)(struct mlx5_core_dev *dev); - void (*remove)(struct mlx5_core_dev *dev, void *context); - int (*attach)(struct mlx5_core_dev *dev, void *context); - void (*detach)(struct mlx5_core_dev *dev, void *context); - int protocol; - struct list_head list; -}; - -int mlx5_register_interface(struct mlx5_interface *intf); -void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); -- cgit v1.2.3 From e87114022e1de734de0552e6b4f2dc5309efa27a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sat, 10 Oct 2020 11:57:26 +0300 Subject: net/mlx5: Simplify eswitch mode check Provide mlx5_core device instead of "priv" pointer while checking eswith mode. Reviewed-by: Roi Dayan Signed-off-by: Leon Romanovsky --- include/linux/mlx5/eswitch.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index b0ae8020f13e..29fd832950e0 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -96,10 +96,10 @@ static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num); -u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); +u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ -static inline u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) +static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) { return MLX5_ESWITCH_NONE; } @@ -136,4 +136,8 @@ mlx5_eswitch_get_vport_metadata_mask(void) } #endif /* CONFIG_MLX5_ESWITCH */ +static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev) +{ + return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS; +} #endif -- cgit v1.2.3 From e91d8d78237de8d7120c320b3645b7100848f24d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Sat, 5 Dec 2020 22:14:51 -0800 Subject: mm/zsmalloc.c: drop ZSMALLOC_PGTABLE_MAPPING While I was doing zram testing, I found sometimes decompression failed since the compression buffer was corrupted. With investigation, I found below commit calls cond_resched unconditionally so it could make a problem in atomic context if the task is reschedule. BUG: sleeping function called from invalid context at mm/vmalloc.c:108 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 946, name: memhog 3 locks held by memhog/946: #0: ffff9d01d4b193e8 (&mm->mmap_lock#2){++++}-{4:4}, at: __mm_populate+0x103/0x160 #1: ffffffffa3d53de0 (fs_reclaim){+.+.}-{0:0}, at: __alloc_pages_slowpath.constprop.0+0xa98/0x1160 #2: ffff9d01d56b8110 (&zspage->lock){.+.+}-{3:3}, at: zs_map_object+0x8e/0x1f0 CPU: 0 PID: 946 Comm: memhog Not tainted 5.9.3-00011-gc5bfc0287345-dirty #316 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 Call Trace: unmap_kernel_range_noflush+0x2eb/0x350 unmap_kernel_range+0x14/0x30 zs_unmap_object+0xd5/0xe0 zram_bvec_rw.isra.0+0x38c/0x8e0 zram_rw_page+0x90/0x101 bdev_write_page+0x92/0xe0 __swap_writepage+0x94/0x4a0 pageout+0xe3/0x3a0 shrink_page_list+0xb94/0xd60 shrink_inactive_list+0x158/0x460 We can fix this by removing the ZSMALLOC_PGTABLE_MAPPING feature (which contains the offending calling code) from zsmalloc. Even though this option showed some amount improvement(e.g., 30%) in some arm32 platforms, it has been headache to maintain since it have abused APIs[1](e.g., unmap_kernel_range in atomic context). Since we are approaching to deprecate 32bit machines and already made the config option available for only builtin build since v5.8, lastly it has been not default option in zsmalloc, it's time to drop the option for better maintenance. [1] http://lore.kernel.org/linux-mm/20201105170249.387069-1-minchan@kernel.org Fixes: e47110e90584 ("mm/vunmap: add cond_resched() in vunmap_pmd_range") Signed-off-by: Minchan Kim Signed-off-by: Andrew Morton Reviewed-by: Sergey Senozhatsky Cc: Tony Lindgren Cc: Christoph Hellwig Cc: Harish Sriram Cc: Uladzislau Rezki Cc: Link: https://lkml.kernel.org/r/20201117202916.GA3856507@google.com Signed-off-by: Linus Torvalds --- include/linux/zsmalloc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 0fdbf653b173..4807ca4d52e0 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -20,7 +20,6 @@ * zsmalloc mapping modes * * NOTE: These only make a difference when a mapped object spans pages. - * They also have no effect when ZSMALLOC_PGTABLE_MAPPING is selected. */ enum zs_mapmode { ZS_MM_RW, /* normal read-write mapping */ -- cgit v1.2.3 From 85261c1ff156eb60fc26c378748387f2e85c6878 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Mon, 16 Nov 2020 14:56:11 +0200 Subject: mei: bus: add vtag support Add API to support vtag in communication on mei bus. Add mei_cldev_send_vtag, mei_cldev_recv_vtag and mei_cldev_recv_nonblock_vtag functions to allow sending a message with vtag set and to receive vtag of an incoming message. Cc: Sean Z Huang Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20201116125612.1660971-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/mei_cl_bus.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 52aa4821093a..959ad7d850b4 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -95,6 +95,12 @@ ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length); ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf, size_t length); +ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, + u8 vtag); +ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, + u8 *vtag); +ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf, + size_t length, u8 *vtag); int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb); int mei_cldev_register_notif_cb(struct mei_cl_device *cldev, -- cgit v1.2.3 From 76437b340b242fd21952f54ba8965d21a1ffa8c8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 7 Dec 2020 10:16:01 +0100 Subject: earlycon: drop semicolon from earlycon macro Drop the trailing semicolon from the OF_EARLYCON_DECLARE() macro definition which was left when removing the array-of-pointer indirection. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20201207091601.5202-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 3e32b788c28d..e1b684e33841 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -372,7 +372,7 @@ extern const struct earlycon_id __earlycon_table_end[]; __aligned(__alignof__(struct earlycon_id)) \ = { .name = __stringify(_name), \ .compatible = compat, \ - .setup = fn }; + .setup = fn } #define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn) -- cgit v1.2.3 From 2d26c716fc49f41a63e1efe8f1f772b0adeaacef Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 7 Dec 2020 10:13:08 +0100 Subject: module: drop semicolon from version macro Drop the trailing semicolon from the MODULE_VERSION() macro definition which was left when removing the array-of-pointer indirection. Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 5958075ea3f4..e7a619c2457e 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -279,7 +279,7 @@ extern typeof(name) __mod_##type##__##name##_device_table \ }, \ .module_name = KBUILD_MODNAME, \ .version = _version, \ - }; + } #endif /* Optional firmware file (or files) needed by the module -- cgit v1.2.3 From c69942bda5152d764ee7d897d1627d64c7177ea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Mon, 30 Nov 2020 16:24:15 +0100 Subject: mtd: spi-nor: Fix multiple typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a few typos in comments in the SPI NOR framework; fix them. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Vignesh Raghavendra Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20201130152416.1283972-1-j.neuschaefer@gmx.net --- include/linux/mtd/spi-nor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 299685d15dc2..d13958de6d8a 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -433,7 +433,7 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor) * @name: the chip type name * @hwcaps: the hardware capabilities supported by the controller driver * - * The drivers can use this fuction to scan the SPI NOR. + * The drivers can use this function to scan the SPI NOR. * In the scanning, it will try to get all the necessary information to * fill the mtd_info{} and the spi_nor{}. * -- cgit v1.2.3 From 0eba770790426553f45b8643bcd77b854e045057 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 5 Nov 2020 20:27:45 +0100 Subject: clk: composite: add devm_clk_hw_register_composite_pdata() This will simplify drivers which would only unregister the clk in their remove() op. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20201105192746.19564-3-michael@walle.cc Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 03a5de5f99f4..33db52ff83a0 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1062,6 +1062,13 @@ struct clk_hw *clk_hw_register_composite_pdata(struct device *dev, struct clk_hw *rate_hw, const struct clk_ops *rate_ops, struct clk_hw *gate_hw, const struct clk_ops *gate_ops, unsigned long flags); +struct clk_hw *devm_clk_hw_register_composite_pdata(struct device *dev, + const char *name, const struct clk_parent_data *parent_data, + int num_parents, + struct clk_hw *mux_hw, const struct clk_ops *mux_ops, + struct clk_hw *rate_hw, const struct clk_ops *rate_ops, + struct clk_hw *gate_hw, const struct clk_ops *gate_ops, + unsigned long flags); void clk_hw_unregister_composite(struct clk_hw *hw); struct clk *clk_register(struct device *dev, struct clk_hw *hw); -- cgit v1.2.3 From d9a9280a0d0ae51dc1d4142138b99242b7ec8ac6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:10:58 +0100 Subject: seq_buf: Avoid type mismatch for seq_buf_init Building with W=2 prints a number of warnings for one function that has a pointer type mismatch: linux/seq_buf.h: In function 'seq_buf_init': linux/seq_buf.h:35:12: warning: pointer targets in assignment from 'unsigned char *' to 'char *' differ in signedness [-Wpointer-sign] Change the type in the function prototype according to the type in the structure. Link: https://lkml.kernel.org/r/20201026161108.3707783-1-arnd@kernel.org Fixes: 9a7777935c34 ("tracing: Convert seq_buf fields to be like seq_file fields") Reviewed-by: Cezary Rojewski Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (VMware) --- include/linux/seq_buf.h | 2 +- include/linux/trace_seq.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index fb0205d87d3c..9d6c28cc4d8f 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -30,7 +30,7 @@ static inline void seq_buf_clear(struct seq_buf *s) } static inline void -seq_buf_init(struct seq_buf *s, unsigned char *buf, unsigned int size) +seq_buf_init(struct seq_buf *s, char *buf, unsigned int size) { s->buffer = buf; s->size = size; diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 6c30508fca19..5a2c650d9e1c 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -12,7 +12,7 @@ */ struct trace_seq { - unsigned char buffer[PAGE_SIZE]; + char buffer[PAGE_SIZE]; struct seq_buf seq; int full; }; @@ -51,7 +51,7 @@ static inline int trace_seq_used(struct trace_seq *s) * that is about to be written to and then return the result * of that write. */ -static inline unsigned char * +static inline char * trace_seq_buffer_ptr(struct trace_seq *s) { return s->buffer + seq_buf_used(&s->seq); -- cgit v1.2.3 From 661d4f55a79483aee4970a76e3bd9d4cdc74ac79 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 22 Nov 2020 15:35:46 +0000 Subject: sbitmap: remove swap_lock map->swap_lock protects map->cleared from concurrent modification, however sbitmap_deferred_clear() is already atomically drains it, so it's guaranteed to not loose bits on concurrent sbitmap_deferred_clear(). A one threaded tag heavy test on top of nullbk showed ~1.5% t-put increase, and 3% -> 1% cycle reduction of sbitmap_get() according to perf. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index e40d019c3d9d..74cc6384715e 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -32,11 +32,6 @@ struct sbitmap_word { * @cleared: word holding cleared bits */ unsigned long cleared ____cacheline_aligned_in_smp; - - /** - * @swap_lock: Held while swapping word <-> cleared - */ - spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** -- cgit v1.2.3 From 26792699fe3681102aa85f4ae6d39e80a6a7e6b6 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Sun, 8 Nov 2020 19:51:09 +0100 Subject: clk: divider: add devm_clk_hw_register_divider_table() This will simplify drivers which would only unregister the clk in their remove() op. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20201108185113.31377-6-michael@walle.cc Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 33db52ff83a0..5f896df01f83 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -639,6 +639,12 @@ struct clk_hw *__clk_hw_register_divider(struct device *dev, const struct clk_parent_data *parent_data, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); +struct clk_hw *__devm_clk_hw_register_divider(struct device *dev, + struct device_node *np, const char *name, + const char *parent_name, const struct clk_hw *parent_hw, + const struct clk_parent_data *parent_data, unsigned long flags, + void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + const struct clk_div_table *table, spinlock_t *lock); struct clk *clk_register_divider_table(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, @@ -779,6 +785,27 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, (parent_data), (flags), (reg), (shift), \ (width), (clk_divider_flags), (table), \ (lock)) +/** + * devm_clk_hw_register_divider_table - register a table based divider clock + * with the clock framework (devres variant) + * @dev: device registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @reg: register address to adjust divider + * @shift: number of bits to shift the bitfield + * @width: width of the bitfield + * @clk_divider_flags: divider-specific flags for this clock + * @table: array of divider/value pairs ending with a div set to 0 + * @lock: shared register lock for this clock + */ +#define devm_clk_hw_register_divider_table(dev, name, parent_name, flags, \ + reg, shift, width, \ + clk_divider_flags, table, lock) \ + __devm_clk_hw_register_divider((dev), NULL, (name), (parent_name), \ + NULL, NULL, (flags), (reg), (shift), \ + (width), (clk_divider_flags), (table), \ + (lock)) void clk_unregister_divider(struct clk *clk); void clk_hw_unregister_divider(struct clk_hw *hw); -- cgit v1.2.3 From 374a96b9600ccf60083c0fec8f727e04752a7f0c Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 6 Dec 2020 11:12:54 +0200 Subject: net/mlx4: Remove unused #define MAX_MSIX_P_PORT All usages of the definition MAX_MSIX_P_PORT were removed. It's not in use anymore. Remove it. Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Link: https://lore.kernel.org/r/20201206091254.12476-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx4/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 06e066e04a4b..236a7d04f891 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -46,7 +46,6 @@ #define DEFAULT_UAR_PAGE_SHIFT 12 -#define MAX_MSIX_P_PORT 17 #define MAX_MSIX 128 #define MIN_MSIX_P_PORT 5 #define MLX4_IS_LEGACY_EQ_MODE(dev_cap) ((dev_cap).num_comp_vectors < \ -- cgit v1.2.3 From fb01a2932e81a1fb2273f87ff92dc8172b8880ee Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 3 Dec 2020 09:26:36 +0800 Subject: blk-mq: add new API of blk_mq_hctx_set_fq_lock_class flush_end_io() may be called recursively from some driver, such as nvme-loop, so lockdep may complain 'possible recursive locking'. Commit b3c6a5997541("block: Fix a lockdep complaint triggered by request queue flushing") tried to address this issue by assigning dynamically allocated per-flush-queue lock class. This solution adds synchronize_rcu() for each hctx's release handler, and causes horrible SCSI MQ probe delay(more than half an hour on megaraid sas). Add new API of blk_mq_hctx_set_fq_lock_class() for these drivers, so we just need to use driver specific lock class for avoiding the lockdep warning of 'possible recursive locking'. Tested-by: Kashyap Desai Reported-by: Qian Cai Cc: Sumit Saxena Cc: John Garry Cc: Kashyap Desai Cc: Bart Van Assche Cc: Hannes Reinecke Signed-off-by: Ming Lei Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 794b2a33a2c3..5f639240760e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -5,6 +5,7 @@ #include #include #include +#include struct blk_mq_tags; struct blk_flush_queue; @@ -594,5 +595,7 @@ static inline void blk_mq_cleanup_rq(struct request *rq) } blk_qc_t blk_mq_submit_bio(struct bio *bio); +void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, + struct lock_class_key *key); #endif -- cgit v1.2.3 From 76ea4d8eeefbfdd37e47c6fd579d0d5852457618 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 24 Nov 2020 10:43:45 +0000 Subject: firmware: arm_scmi: Add power_scale_mw_get() interface Add a new interface to the existing perf_ops and export the information about the power values scale. This would be used by the cpufreq driver and Energy Model framework to set the performance domains scale: milli-Watts or abstract scale. Suggested-by: Morten Rasmussen Reviewed-by: Cristian Marussi Signed-off-by: Lukasz Luba Acked-by: Sudeep Holla Signed-off-by: Viresh Kumar --- include/linux/scmi_protocol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 9cd312a1ff92..c77e4e11e788 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -121,6 +121,7 @@ struct scmi_perf_ops { unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_handle *handle, struct device *dev); + bool (*power_scale_mw_get)(const struct scmi_handle *handle); }; /** -- cgit v1.2.3 From cc00bcaa589914096edef7fb87ca5cee4a166b5c Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 25 Nov 2020 11:27:22 -0700 Subject: netfilter: x_tables: Switch synchronization to RCU When running concurrent iptables rules replacement with data, the per CPU sequence count is checked after the assignment of the new information. The sequence count is used to synchronize with the packet path without the use of any explicit locking. If there are any packets in the packet path using the table information, the sequence count is incremented to an odd value and is incremented to an even after the packet process completion. The new table value assignment is followed by a write memory barrier so every CPU should see the latest value. If the packet path has started with the old table information, the sequence counter will be odd and the iptables replacement will wait till the sequence count is even prior to freeing the old table info. However, this assumes that the new table information assignment and the memory barrier is actually executed prior to the counter check in the replacement thread. If CPU decides to execute the assignment later as there is no user of the table information prior to the sequence check, the packet path in another CPU may use the old table information. The replacement thread would then free the table information under it leading to a use after free in the packet processing context- Unable to handle kernel NULL pointer dereference at virtual address 000000000000008e pc : ip6t_do_table+0x5d0/0x89c lr : ip6t_do_table+0x5b8/0x89c ip6t_do_table+0x5d0/0x89c ip6table_filter_hook+0x24/0x30 nf_hook_slow+0x84/0x120 ip6_input+0x74/0xe0 ip6_rcv_finish+0x7c/0x128 ipv6_rcv+0xac/0xe4 __netif_receive_skb+0x84/0x17c process_backlog+0x15c/0x1b8 napi_poll+0x88/0x284 net_rx_action+0xbc/0x23c __do_softirq+0x20c/0x48c This could be fixed by forcing instruction order after the new table information assignment or by switching to RCU for the synchronization. Fixes: 80055dab5de0 ("netfilter: x_tables: make xt_replace_table wait until old rules are not used anymore") Reported-by: Sean Tranchetti Reported-by: kernel test robot Suggested-by: Florian Westphal Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5deb099d156d..8ebb64193757 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -227,7 +227,7 @@ struct xt_table { unsigned int valid_hooks; /* Man behind the curtain... */ - struct xt_table_info *private; + struct xt_table_info __rcu *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -448,6 +448,9 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); +struct xt_table_info +*xt_table_get_private_protected(const struct xt_table *table); + #ifdef CONFIG_COMPAT #include -- cgit v1.2.3 From 2f24dfb71208eeb0174f08dd56ca6d3c17b279a5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 02:34:50 +0800 Subject: iommu: Delete split_and_remove_iova() Function split_and_remove_iova() has not been referenced since commit e70b081c6f37 ("iommu/vt-d: Remove IOVA handling code from the non-dma_ops path"), so delete it. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1607020492-189471-2-git-send-email-john.garry@huawei.com Signed-off-by: Will Deacon --- include/linux/iova.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index a0637abffee8..a77add571c50 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -160,8 +160,6 @@ int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); -struct iova *split_and_remove_iova(struct iova_domain *iovad, - struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi); void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); #else static inline int iova_cache_get(void) @@ -258,14 +256,6 @@ static inline void put_iova_domain(struct iova_domain *iovad) { } -static inline struct iova *split_and_remove_iova(struct iova_domain *iovad, - struct iova *iova, - unsigned long pfn_lo, - unsigned long pfn_hi) -{ - return NULL; -} - static inline void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) { -- cgit v1.2.3 From 51b70b817b187e48155fc492adb9ce80bdb21b70 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 02:34:51 +0800 Subject: iommu: Stop exporting alloc_iova_mem() It is not used outside iova.c Signed-off-by: John Garry Link: https://lore.kernel.org/r/1607020492-189471-3-git-send-email-john.garry@huawei.com Signed-off-by: Will Deacon --- include/linux/iova.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index a77add571c50..d8c011b6d4ed 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -136,7 +136,6 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) int iova_cache_get(void); void iova_cache_put(void); -struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); void free_iova(struct iova_domain *iovad, unsigned long pfn); void __free_iova(struct iova_domain *iovad, struct iova *iova); @@ -171,11 +170,6 @@ static inline void iova_cache_put(void) { } -static inline struct iova *alloc_iova_mem(void) -{ - return NULL; -} - static inline void free_iova_mem(struct iova *iova) { } -- cgit v1.2.3 From 176cfc187c24287a363e7612cd2385ba40c2042b Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 02:34:52 +0800 Subject: iommu: Stop exporting free_iova_mem() It has no user outside iova.c Signed-off-by: John Garry Link: https://lore.kernel.org/r/1607020492-189471-4-git-send-email-john.garry@huawei.com Signed-off-by: Will Deacon --- include/linux/iova.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index d8c011b6d4ed..76e16ae20729 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -136,7 +136,6 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) int iova_cache_get(void); void iova_cache_put(void); -void free_iova_mem(struct iova *iova); void free_iova(struct iova_domain *iovad, unsigned long pfn); void __free_iova(struct iova_domain *iovad, struct iova *iova); struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, @@ -170,10 +169,6 @@ static inline void iova_cache_put(void) { } -static inline void free_iova_mem(struct iova *iova) -{ -} - static inline void free_iova(struct iova_domain *iovad, unsigned long pfn) { } -- cgit v1.2.3 From fefe8527a1e0e0014946c6b5b3b2e40cb32bb5d3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 25 Nov 2020 17:29:39 +0000 Subject: iommu/io-pgtable: Remove tlb_flush_leaf The only user of tlb_flush_leaf is a particularly hairy corner of the Arm short-descriptor code, which wants a synchronous invalidation to minimise the races inherent in trying to split a large page mapping. This is already far enough into "here be dragons" territory that no sensible caller should ever hit it, and thus it really doesn't need optimising. Although using tlb_flush_walk there may technically be more heavyweight than needed, it does the job and saves everyone else having to carry around useless baggage. Signed-off-by: Robin Murphy Reviewed-by: Steven Price Link: https://lore.kernel.org/r/9844ab0c5cb3da8b2f89c6c2da16941910702b41.1606324115.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index fb4d5a763e0c..ea727eb1a1a9 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -25,8 +25,6 @@ enum io_pgtable_fmt { * @tlb_flush_walk: Synchronously invalidate all intermediate TLB state * (sometimes referred to as the "walk cache") for a virtual * address range. - * @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual - * address range. * @tlb_add_page: Optional callback to queue up leaf TLB invalidation for a * single page. IOMMUs that cannot batch TLB invalidation * operations efficiently will typically issue them here, but @@ -40,8 +38,6 @@ struct iommu_flush_ops { void (*tlb_flush_all)(void *cookie); void (*tlb_flush_walk)(unsigned long iova, size_t size, size_t granule, void *cookie); - void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule, - void *cookie); void (*tlb_add_page)(struct iommu_iotlb_gather *gather, unsigned long iova, size_t granule, void *cookie); }; @@ -228,13 +224,6 @@ io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova, iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie); } -static inline void -io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova, - size_t size, size_t granule) -{ - iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie); -} - static inline void io_pgtable_tlb_add_page(struct io_pgtable *iop, struct iommu_iotlb_gather * gather, unsigned long iova, -- cgit v1.2.3 From 1080399542075bb0e9d46ea80418d76784d1ece8 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Sat, 28 Nov 2020 07:09:23 +0530 Subject: PM / EM: Micro optimization in em_cpu_energy When the sum of the utilization of CPUs in a power domain is zero, return the energy as 0 without doing any computations. Acked-by: Quentin Perret Reviewed-by: Dietmar Eggemann Signed-off-by: Pavankumar Kondeti Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 9618c0a46ef4..757fc60658fa 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -106,6 +106,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, struct em_perf_state *ps; int i, cpu; + if (!sum_util) + return 0; + /* * In order to predict the performance state, map the utilization of * the most utilized CPU of the performance domain to a requested -- cgit v1.2.3 From b577562ccc072ab4b09243740ebeca52309eecd2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 8 Dec 2020 13:16:22 +0100 Subject: PCI: Remove unused HAVE_PCI_SET_MWI Remove unused HAVE_PCI_SET_MWI. Link: https://lore.kernel.org/r/03f20cac-708d-7897-c7c7-cb4e63cfd991@gmail.com Signed-off-by: Heiner Kallweit Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 22207a79762c..fe824afc0e91 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1190,7 +1190,6 @@ void pci_clear_master(struct pci_dev *dev); int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state); int pci_set_cacheline_size(struct pci_dev *dev); -#define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); int __must_check pcim_set_mwi(struct pci_dev *dev); int pci_try_set_mwi(struct pci_dev *dev); -- cgit v1.2.3 From f119cc9818eb33b66e977ad3af75aef6500bbdc3 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 7 Dec 2020 18:51:41 +0800 Subject: net: stmmac: overwrite the dma_cap.addr64 according to HW design The current IP register MAC_HW_Feature1[ADDR64] only defines 32/40/64 bit width, but some SOCs support others like i.MX8MP support 34 bits but it maps to 40 bits width in MAC_HW_Feature1[ADDR64]. So overwrite dma_cap.addr64 according to HW real design. Fixes: 94abdad6974a ("net: ethernet: dwmac: add ethernet glue logic for NXP imx8 chip") Signed-off-by: Fugang Duan Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 628e28903b8b..15ca6b4167cc 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -170,6 +170,7 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; + u32 addr64; u32 rx_queues_to_use; u32 tx_queues_to_use; u8 rx_sched_algorithm; -- cgit v1.2.3 From b60da4955f53d1f50e44351a9c3a37a92503079e Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 8 Dec 2020 18:36:23 +0100 Subject: bpf: Only provide bpf_sock_from_file with CONFIG_NET This moves the bpf_sock_from_file definition into net/core/filter.c which only gets compiled with CONFIG_NET and also moves the helper proto usage next to other tracing helpers that are conditional on CONFIG_NET. This avoids ld: kernel/trace/bpf_trace.o: in function `bpf_sock_from_file': bpf_trace.c:(.text+0xe23): undefined reference to `sock_from_file' When compiling a kernel with BPF and without NET. Reported-by: kernel test robot Reported-by: Randy Dunlap Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Acked-by: Randy Dunlap Acked-by: Martin KaFai Lau Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20201208173623.1136863-1-revest@chromium.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d05e75ed8c1b..07cb5d15e743 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1859,6 +1859,7 @@ extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; +extern const struct bpf_func_proto bpf_sock_from_file_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); -- cgit v1.2.3 From e77dcb0b732dd355ca594909f6c2085dfc46cde2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 6 Nov 2020 10:37:16 +0530 Subject: opp: Don't create an OPP table from dev_pm_opp_get_opp_table() It has been found that some users (like cpufreq-dt and others on LKML) have abused the helper dev_pm_opp_get_opp_table() to create the OPP table instead of just finding it, which is the wrong thing to do. This routine was meant for OPP core's internal working and exposed the whole functionality by mistake. Change the scope of dev_pm_opp_get_opp_table() to only finding the table. The internal helpers _opp_get_opp_table*() are thus renamed to _add_opp_table*(), dev_pm_opp_get_opp_table_indexed() is removed (as we don't need the index field for finding the OPP table) and so the only user, genpd, is updated. Note that the prototype of _add_opp_table() was already left in opp.h by mistake when it was removed earlier and so we weren't required to add it now. Acked-by: Ulf Hansson Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index dbb484524f82..1435c054016a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -90,7 +90,6 @@ struct dev_pm_set_opp_data { #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); -struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev, int index); void dev_pm_opp_put_opp_table(struct opp_table *opp_table); unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); -- cgit v1.2.3 From e4a9378083c57a22624cda8b780ff5f5da4de7ef Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 1 Dec 2020 19:17:33 -0800 Subject: usb: typec: tcpm: Pass down negotiated rev to update retry count nRetryCount was updated from 3 to 2 between PD2.0 and PD3.0 spec. nRetryCount in "Table 6-34 Counter parameters" of the PD 2.0 spec is set to 3, whereas, nRetryCount in "Table 6-59 Counter parameters" is set to 2. Pass down negotiated rev in pd_transmit so that low level chip drivers can update the retry count accordingly before attempting packet transmission. This helps in passing "TEST.PD.PORT.ALL.02" of the "Power Delivery Merged" test suite which was initially failing with "The UUT did not retransmit the message nReryCount times" In fusb302 & tcpci drivers, by default the driver sets the retry count to 3 (Default for PD 2.0). Update this to 2, if the negotiated rev is PD 3.0. In wcove, since the retry count is intentionally set to max, leaving it as is. Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20201202031733.647808-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index e68aaa12886f..efaedd7e8a18 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -121,7 +121,7 @@ struct tcpc_dev { enum typec_cc_status cc); int (*try_role)(struct tcpc_dev *dev, int role); int (*pd_transmit)(struct tcpc_dev *dev, enum tcpm_transmit_type type, - const struct pd_message *msg); + const struct pd_message *msg, unsigned int negotiated_rev); int (*set_bist_data)(struct tcpc_dev *dev, bool on); int (*enable_frs)(struct tcpc_dev *dev, bool enable); void (*frs_sourcing_vbus)(struct tcpc_dev *dev); -- cgit v1.2.3 From 28b43d3d746b89fc112fe681e018b39b43495dad Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 1 Dec 2020 20:08:38 -0800 Subject: usb: typec: tcpm: Introduce vsafe0v for vbus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TCPM at present lacks the notion of VSAFE0V. There are three vbus threshold levels that are critical to track: a. vSafe5V - VBUS “5 volts” as defined by the USB PD specification. b. vSinkDisconnect - Threshold used for transition from Attached.SNK to Unattached.SNK. c. vSafe0V - VBUS “0 volts” as defined by the USB PD specification. Tracking vSafe0V is crucial for entry into Try.SNK and Attached.SRC and turning vbus back on by the source in response to hard reset. >From "4.5.2.2.8.2 Exiting from AttachWait.SRC State" section in the Type-C spec: "The port shall transition to Attached.SRC when VBUS is at vSafe0V and the SRC.Rd state is detected on exactly one of the CC1 or CC2 pins for at least tCCDebounce." "A DRP that strongly prefers the Sink role may optionally transition to Try.SNK instead of Attached.SRC when VBUS is at vSafe0V and the SRC.Rd state is detected on exactly one of the CC1 or CC2 pins for at least tCCDebounce." >From "7.1.5 Response to Hard Resets" section in the PD spec: "After establishing the vSafe0V voltage condition on VBUS, the Source Shall wait tSrcRecover before re-applying VCONN and restoring VBUS to vSafe5V." vbus_present in the TCPM code tracks vSafe5V(vbus_present is true) and vSinkDisconnect(vbus_present is false). This change adds is_vbus_vsafe0v callback which when set makes TCPM query for vSafe0V voltage level when needed. Since not all TCPC controllers might have the capability to report vSafe0V, TCPM assumes that vSafe0V is same as vSinkDisconnect when is_vbus_vsafe0v callback is not set. This allows TCPM to continue to support controllers which don't have the support for reporting vSafe0V. Introducing vSafe0V helps fix the failure reported at "Step 15. CVS verifies PUT remains in AttachWait.SRC for 500ms" of "TD 4.7.2 Try. SNK DRP Connect DRP Test" of "Universal Serial Bus Type-C (USB Type-C) Functional Test Specification Chapters 4 and 5". Here the compliance tester intentionally maintains vbus at greater than vSafe0V and expects the Product under test to stay in AttachWait.SRC till vbus drops to vSafe0V. Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20201202040840.663578-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index efaedd7e8a18..f4a18427f5c4 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -98,6 +98,12 @@ enum tcpm_transmit_type { * will be turned on. requested_vbus_voltage is set to 0 when vbus * is going to disappear knowingly i.e. during PR_SWAP and * HARD_RESET etc. + * @is_vbus_vsafe0v: + * Optional; TCPCI spec based TCPC implementations are expected to + * detect VSAFE0V voltage level at vbus. When detection of VSAFE0V + * is supported by TCPC, set this callback for TCPM to query + * whether vbus is at VSAFE0V when needed. + * Returns true when vbus is at VSAFE0V, false otherwise. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -128,6 +134,7 @@ struct tcpc_dev { int (*enable_auto_vbus_discharge)(struct tcpc_dev *dev, bool enable); int (*set_auto_vbus_discharge_threshold)(struct tcpc_dev *dev, enum typec_pwr_opmode mode, bool pps_active, u32 requested_vbus_voltage); + bool (*is_vbus_vsafe0v)(struct tcpc_dev *dev); }; struct tcpm_port; -- cgit v1.2.3 From af633212c4aac1dbd3a48615a834646ce072346d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 2 Dec 2020 12:39:36 +0100 Subject: tty: use assign_bit() in port-flag accessors Use the new assign_bit() wrapper in the port-flag accessors instead of open coding. Suggested-by: Jiri Slaby Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20201202113942.27024-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 0c2f97ba8018..9b8e7d5bf2fc 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -615,10 +615,7 @@ static inline bool tty_port_cts_enabled(struct tty_port *port) static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_CTS_FLOW, &port->iflags); - else - clear_bit(TTY_PORT_CTS_FLOW, &port->iflags); + assign_bit(TTY_PORT_CTS_FLOW, &port->iflags, val); } static inline bool tty_port_active(struct tty_port *port) @@ -628,10 +625,7 @@ static inline bool tty_port_active(struct tty_port *port) static inline void tty_port_set_active(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_ACTIVE, &port->iflags); - else - clear_bit(TTY_PORT_ACTIVE, &port->iflags); + assign_bit(TTY_PORT_ACTIVE, &port->iflags, val); } static inline bool tty_port_check_carrier(struct tty_port *port) @@ -641,10 +635,7 @@ static inline bool tty_port_check_carrier(struct tty_port *port) static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_CHECK_CD, &port->iflags); - else - clear_bit(TTY_PORT_CHECK_CD, &port->iflags); + assign_bit(TTY_PORT_CHECK_CD, &port->iflags, val); } static inline bool tty_port_suspended(struct tty_port *port) @@ -654,10 +645,7 @@ static inline bool tty_port_suspended(struct tty_port *port) static inline void tty_port_set_suspended(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_SUSPENDED, &port->iflags); - else - clear_bit(TTY_PORT_SUSPENDED, &port->iflags); + assign_bit(TTY_PORT_SUSPENDED, &port->iflags, val); } static inline bool tty_port_initialized(struct tty_port *port) @@ -667,10 +655,7 @@ static inline bool tty_port_initialized(struct tty_port *port) static inline void tty_port_set_initialized(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_INITIALIZED, &port->iflags); - else - clear_bit(TTY_PORT_INITIALIZED, &port->iflags); + assign_bit(TTY_PORT_INITIALIZED, &port->iflags, val); } static inline bool tty_port_kopened(struct tty_port *port) @@ -680,10 +665,7 @@ static inline bool tty_port_kopened(struct tty_port *port) static inline void tty_port_set_kopened(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_KOPENED, &port->iflags); - else - clear_bit(TTY_PORT_KOPENED, &port->iflags); + assign_bit(TTY_PORT_KOPENED, &port->iflags, val); } extern struct tty_struct *tty_port_tty_get(struct tty_port *port); -- cgit v1.2.3 From 9e1792727ead477f49958578d0dbd466a7deea48 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 2 Dec 2020 12:39:37 +0100 Subject: tty: use const parameters in port-flag accessors Declare the port parameter to the flag-test accessors as const. This is currently mostly cosmetic as the accessors are already inlined. Suggested-by: Jiri Slaby Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20201202113942.27024-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 9b8e7d5bf2fc..c873f475f0a7 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -608,7 +608,7 @@ static inline struct tty_port *tty_port_get(struct tty_port *port) } /* If the cts flow control is enabled, return true. */ -static inline bool tty_port_cts_enabled(struct tty_port *port) +static inline bool tty_port_cts_enabled(const struct tty_port *port) { return test_bit(TTY_PORT_CTS_FLOW, &port->iflags); } @@ -618,7 +618,7 @@ static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) assign_bit(TTY_PORT_CTS_FLOW, &port->iflags, val); } -static inline bool tty_port_active(struct tty_port *port) +static inline bool tty_port_active(const struct tty_port *port) { return test_bit(TTY_PORT_ACTIVE, &port->iflags); } @@ -628,7 +628,7 @@ static inline void tty_port_set_active(struct tty_port *port, bool val) assign_bit(TTY_PORT_ACTIVE, &port->iflags, val); } -static inline bool tty_port_check_carrier(struct tty_port *port) +static inline bool tty_port_check_carrier(const struct tty_port *port) { return test_bit(TTY_PORT_CHECK_CD, &port->iflags); } @@ -638,7 +638,7 @@ static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) assign_bit(TTY_PORT_CHECK_CD, &port->iflags, val); } -static inline bool tty_port_suspended(struct tty_port *port) +static inline bool tty_port_suspended(const struct tty_port *port) { return test_bit(TTY_PORT_SUSPENDED, &port->iflags); } @@ -648,7 +648,7 @@ static inline void tty_port_set_suspended(struct tty_port *port, bool val) assign_bit(TTY_PORT_SUSPENDED, &port->iflags, val); } -static inline bool tty_port_initialized(struct tty_port *port) +static inline bool tty_port_initialized(const struct tty_port *port) { return test_bit(TTY_PORT_INITIALIZED, &port->iflags); } @@ -658,7 +658,7 @@ static inline void tty_port_set_initialized(struct tty_port *port, bool val) assign_bit(TTY_PORT_INITIALIZED, &port->iflags, val); } -static inline bool tty_port_kopened(struct tty_port *port) +static inline bool tty_port_kopened(const struct tty_port *port) { return test_bit(TTY_PORT_KOPENED, &port->iflags); } -- cgit v1.2.3 From 4b03d99794eeed27650597a886247c6427ce1055 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 30 Nov 2020 17:46:16 -0500 Subject: nfsd: minor nfsd4_change_attribute cleanup Minor cleanup, no change in behavior. Also pull out a common helper that'll be useful elsewhere. Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- include/linux/iversion.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iversion.h b/include/linux/iversion.h index 2917ef990d43..3bfebde5a1a6 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -328,6 +328,19 @@ inode_query_iversion(struct inode *inode) return cur >> I_VERSION_QUERIED_SHIFT; } +/* + * For filesystems without any sort of change attribute, the best we can + * do is fake one up from the ctime: + */ +static inline u64 time_to_chattr(struct timespec64 *t) +{ + u64 chattr = t->tv_sec; + + chattr <<= 32; + chattr += t->tv_nsec; + return chattr; +} + /** * inode_eq_iversion_raw - check whether the raw i_version counter has changed * @inode: inode to check -- cgit v1.2.3 From 1631087ba8727db03c0ab2815dc06dc25d962b80 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 30 Nov 2020 17:46:18 -0500 Subject: Revert "nfsd4: support change_attr_type attribute" This reverts commit a85857633b04d57f4524cca0a2bfaf87b2543f9f. We're still factoring ctime into our change attribute even in the IS_I_VERSION case. If someone sets the system time backwards, a client could see the change attribute go backwards. Maybe we can just say "well, don't do that", but there's some question whether that's good enough, or whether we need a better guarantee. Also, the client still isn't actually using the attribute. While we're still figuring this out, let's just stop returning this attribute. Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- include/linux/nfs4.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 9dc7eeac924f..5b4c67c91f56 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -385,13 +385,6 @@ enum lock_type4 { NFS4_WRITEW_LT = 4 }; -enum change_attr_type4 { - NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0, - NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1, - NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2, - NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3, - NFS4_CHANGE_TYPE_IS_UNDEFINED = 4 -}; /* Mandatory Attributes */ #define FATTR4_WORD0_SUPPORTED_ATTRS (1UL << 0) @@ -459,7 +452,6 @@ enum change_attr_type4 { #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) -#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_MODE_UMASK (1UL << 17) #define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) -- cgit v1.2.3 From daab110e47f8d7aa6da66923e3ac1a8dbd2b2a72 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:14 -0500 Subject: nfsd: add a new EXPORT_OP_NOWCC flag to struct export_operations With NFSv3 nfsd will always attempt to send along WCC data to the client. This generally involves saving off the in-core inode information prior to doing the operation on the given filehandle, and then issuing a vfs_getattr to it after the op. Some filesystems (particularly clustered or networked ones) have an expensive ->getattr inode operation. Atomicity is also often difficult or impossible to guarantee on such filesystems. For those, we're best off not trying to provide WCC information to the client at all, and to simply allow it to poll for that information as needed with a GETATTR RPC. This patch adds a new flags field to struct export_operations, and defines a new EXPORT_OP_NOWCC flag that filesystems can use to indicate that nfsd should not attempt to provide WCC info in NFSv3 replies. It also adds a blurb about the new flags field and flag to the exporting documentation. The server will also now skip collecting this information for NFSv2 as well, since that info is never used there anyway. Note that this patch does not add this flag to any filesystem export_operations structures. This was originally developed to allow reexporting nfs via nfsd. Other filesystems may want to consider enabling this flag too. It's hard to tell however which ones have export operations to enable export via knfsd and which ones mostly rely on them for open-by-filehandle support, so I'm leaving that up to the individual maintainers to decide. I am cc'ing the relevant lists for those filesystems that I think may want to consider adding this though. Cc: HPDD-discuss@lists.01.org Cc: ceph-devel@vger.kernel.org Cc: cluster-devel@redhat.com Cc: fuse-devel@lists.sourceforge.net Cc: ocfs2-devel@oss.oracle.com Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3ceb72b67a7a..e7de0103a32e 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,6 +213,8 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); +#define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ + unsigned long flags; }; extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, -- cgit v1.2.3 From ba5e8187c55555519ae0b63c0fb681391bc42af9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:15 -0500 Subject: nfsd: allow filesystems to opt out of subtree checking When we start allowing NFS to be reexported, then we have some problems when it comes to subtree checking. In principle, we could allow it, but it would mean encoding parent info in the filehandles and there may not be enough space for that in a NFSv3 filehandle. To enforce this at export upcall time, we add a new export_ops flag that declares the filesystem ineligible for subtree checking. Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index e7de0103a32e..2fcbab0f6b61 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -214,6 +214,7 @@ struct export_operations { int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); #define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ +#define EXPORT_OP_NOSUBTREECHK (0x2) /* Subtree checking is not supported! */ unsigned long flags; }; -- cgit v1.2.3 From 7f84b488f9add1d5cca3e6197c95914c7bd3c1cf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:16 -0500 Subject: nfsd: close cached files prior to a REMOVE or RENAME that would replace target It's not uncommon for some workloads to do a bunch of I/O to a file and delete it just afterward. If knfsd has a cached open file however, then the file may still be open when the dentry is unlinked. If the underlying filesystem is nfs, then that could trigger it to do a sillyrename. On a REMOVE or RENAME scan the nfsd_file cache for open files that correspond to the inode, and proactively unhash and put their references. This should prevent any delete-on-last-close activity from occurring, solely due to knfsd's open file cache. This must be done synchronously though so we use the variants that call flush_delayed_fput. There are deadlock possibilities if you call flush_delayed_fput while holding locks, however. In the case of nfsd_rename, we don't even do the lookups of the dentries to be renamed until we've locked for rename. Once we've figured out what the target dentry is for a rename, check to see whether there are cached open files associated with it. If there are, then unwind all of the locking, close them all, and then reattempt the rename. None of this is really necessary for "typical" filesystems though. It's mostly of use for NFS, so declare a new export op flag and use that to determine whether to close the files beforehand. Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 2fcbab0f6b61..d829403ffd3b 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,8 +213,9 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); -#define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ -#define EXPORT_OP_NOSUBTREECHK (0x2) /* Subtree checking is not supported! */ +#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ +#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ +#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ unsigned long flags; }; -- cgit v1.2.3 From d045465fc6cbfa4acfb5a7d817a7c1a57a078109 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 17:03:17 -0500 Subject: exportfs: Add a function to return the raw output from fh_to_dentry() In order to allow nfsd to accept return values that are not acceptable to overlayfs and others, add a new function. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d829403ffd3b..846df3c96730 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -223,6 +223,11 @@ extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent); extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int connectable); +extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt, + struct fid *fid, int fh_len, + int fileid_type, + int (*acceptable)(void *, struct dentry *), + void *context); extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), void *context); -- cgit v1.2.3 From 01cbf3853959feec40ec9b9a399e12a021cd4d81 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 17:03:19 -0500 Subject: nfsd: Set PF_LOCAL_THROTTLE on local filesystems only Don't set PF_LOCAL_THROTTLE on remote filesystems like NFS, since they aren't expected to ever be subject to double buffering. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 846df3c96730..d93e8a6737bb 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -216,6 +216,7 @@ struct export_operations { #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ +#define EXPORT_OP_REMOTE_FS (0x8) /* Filesystem is remote */ unsigned long flags; }; -- cgit v1.2.3 From 716a8bc7f706eeef80ab42c99d9f210eda845c81 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 23:14:27 -0500 Subject: nfsd: Record NFSv4 pre/post-op attributes as non-atomic For the case of NFSv4, specify to the client that the pre/post-op attributes were not recorded atomically with the main operation. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d93e8a6737bb..9f4d4bcbf251 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -217,6 +217,9 @@ struct export_operations { #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ #define EXPORT_OP_REMOTE_FS (0x8) /* Filesystem is remote */ +#define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply + atomic attribute updates + */ unsigned long flags; }; -- cgit v1.2.3 From 0f9368b5bf6db0c04afc5454b1be79022a681615 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Dec 2020 14:10:32 -0600 Subject: rwsem: Implement down_read_killable_nested In preparation for converting exec_update_mutex to a rwsem so that multiple readers can execute in parallel and not deadlock, add down_read_killable_nested. This is needed so that kcmp_lock can be converted from working on a mutexes to working on rw_semaphores. Signed-off-by: Eric W. Biederman Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/87o8jabqh3.fsf@x220.int.ebiederm.org --- include/linux/rwsem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 25e3fde85617..13021b08b2ed 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -171,6 +171,7 @@ extern void downgrade_write(struct rw_semaphore *sem); * See Documentation/locking/lockdep-design.rst for more details.) */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); +extern int __must_check down_read_killable_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass); extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock); @@ -191,6 +192,7 @@ extern void down_read_non_owner(struct rw_semaphore *sem); extern void up_read_non_owner(struct rw_semaphore *sem); #else # define down_read_nested(sem, subclass) down_read(sem) +# define down_read_killable_nested(sem, subclass) down_read_killable(sem) # define down_write_nest_lock(sem, nest_lock) down_write(sem) # define down_write_nested(sem, subclass) down_write(sem) # define down_write_killable_nested(sem, subclass) down_write_killable(sem) -- cgit v1.2.3 From 31784cff7ee073b34d6eddabb95e3be2880a425c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Dec 2020 14:11:13 -0600 Subject: rwsem: Implement down_read_interruptible In preparation for converting exec_update_mutex to a rwsem so that multiple readers can execute in parallel and not deadlock, add down_read_interruptible. This is needed for perf_event_open to be converted (with no semantic changes) from working on a mutex to wroking on a rwsem. Signed-off-by: Eric W. Biederman Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/87k0tybqfy.fsf@x220.int.ebiederm.org --- include/linux/rwsem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 13021b08b2ed..4c715be48717 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -123,6 +123,7 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) * lock for reading */ extern void down_read(struct rw_semaphore *sem); +extern int __must_check down_read_interruptible(struct rw_semaphore *sem); extern int __must_check down_read_killable(struct rw_semaphore *sem); /* -- cgit v1.2.3 From 66bcfcdf89d00f2409f4b5da0f8c20c08318dc72 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Sun, 6 Dec 2020 17:21:42 +0100 Subject: seqlock: Prefix internal seqcount_t-only macros with a "do_" When the seqcount_LOCKNAME_t group of data types were introduced, two classes of seqlock.h sequence counter macros were added: - An external public API which can either take a plain seqcount_t or any of the seqcount_LOCKNAME_t variants. - An internal API which takes only a plain seqcount_t. To distinguish between the two groups, the "*_seqcount_t_*" pattern was used for the latter. This confused a number of mm/ call-site developers, and Linus also commented that it was not a standard practice for marking seqlock.h internal APIs. Distinguish the latter group of macros by prefixing a "do_". Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/CAHk-=wikhGExmprXgaW+MVXG1zsGpztBbVwOb23vetk41EtTBQ@mail.gmail.com --- include/linux/seqlock.h | 66 ++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index d89134c74fba..235cbc65fd71 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -425,9 +425,9 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - __read_seqcount_t_retry(seqprop_ptr(s), start) + do___read_seqcount_retry(seqprop_ptr(s), start) -static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) +static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) { kcsan_atomic_next(0); return unlikely(READ_ONCE(s->sequence) != start); @@ -445,12 +445,12 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - read_seqcount_t_retry(seqprop_ptr(s), start) + do_read_seqcount_retry(seqprop_ptr(s), start) -static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) +static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) { smp_rmb(); - return __read_seqcount_t_retry(s, start); + return do___read_seqcount_retry(s, start); } /** @@ -462,10 +462,10 @@ do { \ if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - raw_write_seqcount_t_begin(seqprop_ptr(s)); \ + do_raw_write_seqcount_begin(seqprop_ptr(s)); \ } while (0) -static inline void raw_write_seqcount_t_begin(seqcount_t *s) +static inline void do_raw_write_seqcount_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); s->sequence++; @@ -478,13 +478,13 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s) */ #define raw_write_seqcount_end(s) \ do { \ - raw_write_seqcount_t_end(seqprop_ptr(s)); \ + do_raw_write_seqcount_end(seqprop_ptr(s)); \ \ if (seqprop_preemptible(s)) \ preempt_enable(); \ } while (0) -static inline void raw_write_seqcount_t_end(seqcount_t *s) +static inline void do_raw_write_seqcount_end(seqcount_t *s) { smp_wmb(); s->sequence++; @@ -506,12 +506,12 @@ do { \ if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin_nested(seqprop_ptr(s), subclass); \ + do_write_seqcount_begin_nested(seqprop_ptr(s), subclass); \ } while (0) -static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) +static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) { - raw_write_seqcount_t_begin(s); + do_raw_write_seqcount_begin(s); seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); } @@ -533,12 +533,12 @@ do { \ if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin(seqprop_ptr(s)); \ + do_write_seqcount_begin(seqprop_ptr(s)); \ } while (0) -static inline void write_seqcount_t_begin(seqcount_t *s) +static inline void do_write_seqcount_begin(seqcount_t *s) { - write_seqcount_t_begin_nested(s, 0); + do_write_seqcount_begin_nested(s, 0); } /** @@ -549,16 +549,16 @@ static inline void write_seqcount_t_begin(seqcount_t *s) */ #define write_seqcount_end(s) \ do { \ - write_seqcount_t_end(seqprop_ptr(s)); \ + do_write_seqcount_end(seqprop_ptr(s)); \ \ if (seqprop_preemptible(s)) \ preempt_enable(); \ } while (0) -static inline void write_seqcount_t_end(seqcount_t *s) +static inline void do_write_seqcount_end(seqcount_t *s) { seqcount_release(&s->dep_map, _RET_IP_); - raw_write_seqcount_t_end(s); + do_raw_write_seqcount_end(s); } /** @@ -603,9 +603,9 @@ static inline void write_seqcount_t_end(seqcount_t *s) * } */ #define raw_write_seqcount_barrier(s) \ - raw_write_seqcount_t_barrier(seqprop_ptr(s)) + do_raw_write_seqcount_barrier(seqprop_ptr(s)) -static inline void raw_write_seqcount_t_barrier(seqcount_t *s) +static inline void do_raw_write_seqcount_barrier(seqcount_t *s) { kcsan_nestable_atomic_begin(); s->sequence++; @@ -623,9 +623,9 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s) * will complete successfully and see data older than this. */ #define write_seqcount_invalidate(s) \ - write_seqcount_t_invalidate(seqprop_ptr(s)) + do_write_seqcount_invalidate(seqprop_ptr(s)) -static inline void write_seqcount_t_invalidate(seqcount_t *s) +static inline void do_write_seqcount_invalidate(seqcount_t *s) { smp_wmb(); kcsan_nestable_atomic_begin(); @@ -865,9 +865,9 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) } /* - * For all seqlock_t write side functions, use write_seqcount_*t*_begin() - * instead of the generic write_seqcount_begin(). This way, no redundant - * lockdep_assert_held() checks are added. + * For all seqlock_t write side functions, use the the internal + * do_write_seqcount_begin() instead of generic write_seqcount_begin(). + * This way, no redundant lockdep_assert_held() checks are added. */ /** @@ -886,7 +886,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); - write_seqcount_t_begin(&sl->seqcount.seqcount); + do_write_seqcount_begin(&sl->seqcount.seqcount); } /** @@ -898,7 +898,7 @@ static inline void write_seqlock(seqlock_t *sl) */ static inline void write_sequnlock(seqlock_t *sl) { - write_seqcount_t_end(&sl->seqcount.seqcount); + do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock(&sl->lock); } @@ -912,7 +912,7 @@ static inline void write_sequnlock(seqlock_t *sl) static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); - write_seqcount_t_begin(&sl->seqcount.seqcount); + do_write_seqcount_begin(&sl->seqcount.seqcount); } /** @@ -925,7 +925,7 @@ static inline void write_seqlock_bh(seqlock_t *sl) */ static inline void write_sequnlock_bh(seqlock_t *sl) { - write_seqcount_t_end(&sl->seqcount.seqcount); + do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_bh(&sl->lock); } @@ -939,7 +939,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl) static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); - write_seqcount_t_begin(&sl->seqcount.seqcount); + do_write_seqcount_begin(&sl->seqcount.seqcount); } /** @@ -951,7 +951,7 @@ static inline void write_seqlock_irq(seqlock_t *sl) */ static inline void write_sequnlock_irq(seqlock_t *sl) { - write_seqcount_t_end(&sl->seqcount.seqcount); + do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_irq(&sl->lock); } @@ -960,7 +960,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) unsigned long flags; spin_lock_irqsave(&sl->lock, flags); - write_seqcount_t_begin(&sl->seqcount.seqcount); + do_write_seqcount_begin(&sl->seqcount.seqcount); return flags; } @@ -989,7 +989,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { - write_seqcount_t_end(&sl->seqcount.seqcount); + do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_irqrestore(&sl->lock, flags); } -- cgit v1.2.3 From cb262935a166bdef0ccfe6e2adffa00c0f2d038a Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Sun, 6 Dec 2020 17:21:43 +0100 Subject: seqlock: kernel-doc: Specify when preemption is automatically altered The kernel-doc annotations for sequence counters write side functions are incomplete: they do not specify when preemption is automatically disabled and re-enabled. This has confused a number of call-site developers. Fix it. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/CAHk-=wikhGExmprXgaW+MVXG1zsGpztBbVwOb23vetk41EtTBQ@mail.gmail.com --- include/linux/seqlock.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 235cbc65fd71..2f7bb92b4c9e 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -456,6 +456,8 @@ static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) /** * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants + * + * Context: check write_seqcount_begin() */ #define raw_write_seqcount_begin(s) \ do { \ @@ -475,6 +477,8 @@ static inline void do_raw_write_seqcount_begin(seqcount_t *s) /** * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants + * + * Context: check write_seqcount_end() */ #define raw_write_seqcount_end(s) \ do { \ @@ -498,6 +502,7 @@ static inline void do_raw_write_seqcount_end(seqcount_t *s) * @subclass: lockdep nesting level * * See Documentation/locking/lockdep-design.rst + * Context: check write_seqcount_begin() */ #define write_seqcount_begin_nested(s, subclass) \ do { \ @@ -519,11 +524,10 @@ static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) * write_seqcount_begin() - start a seqcount_t write side critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * - * write_seqcount_begin opens a write side critical section of the given - * seqcount_t. - * - * Context: seqcount_t write side critical sections must be serialized and - * non-preemptible. If readers can be invoked from hardirq or softirq + * Context: sequence counter write side sections must be serialized and + * non-preemptible. Preemption will be automatically disabled if and + * only if the seqcount write serialization lock is associated, and + * preemptible. If readers can be invoked from hardirq or softirq * context, interrupts or bottom halves must be respectively disabled. */ #define write_seqcount_begin(s) \ @@ -545,7 +549,8 @@ static inline void do_write_seqcount_begin(seqcount_t *s) * write_seqcount_end() - end a seqcount_t write side critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * - * The write section must've been opened with write_seqcount_begin(). + * Context: Preemption will be automatically re-enabled if and only if + * the seqcount write serialization lock is associated, and preemptible. */ #define write_seqcount_end(s) \ do { \ -- cgit v1.2.3 From c95d64012ad7de2747923b0caf80e195e940606c Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:16 -0800 Subject: Revert "driver core: Avoid deferred probe due to fw_devlink_pause/resume()" This reverts commit 2451e746478a6a6e981cfa66b62b791ca93b90c8. fw_devlink_pause/resume() was an incomplete attempt at boot time optimization. That's going to get replaced by a much better optimization at the end of the series. Since fw_devlink_pause/resume() is going away, changes made for that can also go away. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 5ed101be7b2e..da00f8e449bb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -352,8 +352,7 @@ enum dl_dev_state { * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. * @needs_suppliers: Hook to global list of devices waiting for suppliers. - * @defer_hook: Hook to global list of devices that have deferred sync_state or - * deferred fw_devlink. + * @defer_hook: Hook to global list of devices that have deferred sync_state. * @need_for_probe: If needs_suppliers is on a list, this indicates if the * suppliers are needed for probe or not. * @status: Driver status information. -- cgit v1.2.3 From 3b052a3e30f2eb92dcae9fd89af48d5a13045737 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:17 -0800 Subject: Revert "driver core: Rename dev_links_info.defer_sync to defer_hook" This reverts commit ec7bd78498f29680f536451fbdf9464e851273ed. This field rename was done to reuse defer_syc list head for multiple lists. That's not needed anymore and this list head will only be used for defer sync. So revert this patch to avoid conflicts with the other reverts coming after this. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index da00f8e449bb..1e771ea4dca6 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -352,7 +352,7 @@ enum dl_dev_state { * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. * @needs_suppliers: Hook to global list of devices waiting for suppliers. - * @defer_hook: Hook to global list of devices that have deferred sync_state. + * @defer_sync: Hook to global list of devices that have deferred sync_state. * @need_for_probe: If needs_suppliers is on a list, this indicates if the * suppliers are needed for probe or not. * @status: Driver status information. @@ -361,7 +361,7 @@ struct dev_links_info { struct list_head suppliers; struct list_head consumers; struct list_head needs_suppliers; - struct list_head defer_hook; + struct list_head defer_sync; bool need_for_probe; enum dl_dev_state status; }; -- cgit v1.2.3 From c84b90909e475a2eb4934b4d92fdd10e73e75805 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:21 -0800 Subject: Revert "driver core: fw_devlink: Add support for batching fwnode parsing" This reverts commit 716a7a25969003d82ab738179c3f1068a120ed11. The fw_devlink_pause/resume() APIs added by the commit being reverted were a first cut attempt at optimizing boot time. But these APIs don't fully solve the problem and are very fragile (can only be used for the top level devices being added). This series replaces them with a much better optimization that works for all device additions and also has the benefit of reducing the complexity of the firmware (DT, EFI) specific code and abstracting out common code to driver core. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-7-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 9506f8ec0974..e0abafbb17f8 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -171,7 +171,5 @@ struct fwnode_operations { #define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev) extern u32 fw_devlink_get_flags(void); -void fw_devlink_pause(void); -void fw_devlink_resume(void); #endif -- cgit v1.2.3 From 01bb86b380a306bd937c96da36f66429f3362137 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:22 -0800 Subject: driver core: Add fwnode_init() There are multiple locations in the kernel where a struct fwnode_handle is initialized. Add fwnode_init() so that we have one way of initializing a fwnode_handle. Acked-by: Rob Herring Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-8-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 6 ++++++ include/linux/of.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index e0abafbb17f8..5589799708b5 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -170,6 +170,12 @@ struct fwnode_operations { } while (false) #define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev) +static inline void fwnode_init(struct fwnode_handle *fwnode, + const struct fwnode_operations *ops) +{ + fwnode->ops = ops; +} + extern u32 fw_devlink_get_flags(void); #endif diff --git a/include/linux/of.h b/include/linux/of.h index af655d264f10..df71a3a1bb8d 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -108,7 +108,7 @@ static inline void of_node_init(struct device_node *node) #if defined(CONFIG_OF_KOBJ) kobject_init(&node->kobj, &of_node_ktype); #endif - node->fwnode.ops = &of_fwnode_ops; + fwnode_init(&node->fwnode, &of_fwnode_ops); } #if defined(CONFIG_OF_KOBJ) -- cgit v1.2.3 From 7b337cb3ebde384cba7405b61dfb84200bf623bf Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:23 -0800 Subject: driver core: Add fwnode link support Add support for creating supplier-consumer links between fwnodes. It is intended for internal use the driver core and generic firmware support code (eg. Device Tree, ACPI), so it is simple by design and the API provided is limited. Acked-by: Rob Herring Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-9-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 5589799708b5..b88365187347 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -10,6 +10,7 @@ #define _LINUX_FWNODE_H_ #include +#include struct fwnode_operations; struct device; @@ -18,6 +19,15 @@ struct fwnode_handle { struct fwnode_handle *secondary; const struct fwnode_operations *ops; struct device *dev; + struct list_head suppliers; + struct list_head consumers; +}; + +struct fwnode_link { + struct fwnode_handle *supplier; + struct list_head s_hook; + struct fwnode_handle *consumer; + struct list_head c_hook; }; /** @@ -174,8 +184,12 @@ static inline void fwnode_init(struct fwnode_handle *fwnode, const struct fwnode_operations *ops) { fwnode->ops = ops; + INIT_LIST_HEAD(&fwnode->consumers); + INIT_LIST_HEAD(&fwnode->suppliers); } extern u32 fw_devlink_get_flags(void); +int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); +void fwnode_links_purge(struct fwnode_handle *fwnode); #endif -- cgit v1.2.3 From b5d3e2fbcb10957521af14c4256cd0e5f68b9234 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:25 -0800 Subject: device property: Add fwnode_is_ancestor_of() and fwnode_get_next_parent_dev() Add fwnode_is_ancestor_of() helper function to check if a fwnode is an ancestor of another fwnode. Add fwnode_get_next_parent_dev() helper function that take as input a fwnode and finds the closest ancestor fwnode that has a corresponding struct device and returns that struct device. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-11-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 2d4542629d80..0a9001fe7aea 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -85,9 +85,12 @@ const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_next_parent( struct fwnode_handle *fwnode); +struct device *fwnode_get_next_parent_dev(struct fwnode_handle *fwnode); unsigned int fwnode_count_parents(const struct fwnode_handle *fwn); struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwn, unsigned int depth); +bool fwnode_is_ancestor_of(struct fwnode_handle *test_ancestor, + struct fwnode_handle *test_child); struct fwnode_handle *fwnode_get_next_child_node( const struct fwnode_handle *fwnode, struct fwnode_handle *child); struct fwnode_handle *fwnode_get_next_available_child_node( -- cgit v1.2.3 From 04f63c213b671d89db35f4239f57fa1edeb736a8 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:26 -0800 Subject: driver core: Redefine the meaning of fwnode_operations.add_links() Change the meaning of fwnode_operations.add_links() to just create fwnode links by parsing the properties of a given fwnode. This patch doesn't actually make any code changes. To keeps things more digestable, the actual functional changes come in later patches in this series. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-12-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 42 +++--------------------------------------- 1 file changed, 3 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index b88365187347..942a6bb18201 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -78,44 +78,8 @@ struct fwnode_reference_args { * endpoint node. * @graph_get_port_parent: Return the parent node of a port node. * @graph_parse_endpoint: Parse endpoint for port and endpoint id. - * @add_links: Called after the device corresponding to the fwnode is added - * using device_add(). The function is expected to create device - * links to all the suppliers of the device that are available at - * the time this function is called. The function must NOT stop - * at the first failed device link if other unlinked supplier - * devices are present in the system. This is necessary for the - * driver/bus sync_state() callbacks to work correctly. - * - * For example, say Device-C depends on suppliers Device-S1 and - * Device-S2 and the dependency is listed in that order in the - * firmware. Say, S1 gets populated from the firmware after - * late_initcall_sync(). Say S2 is populated and probed way - * before that in device_initcall(). When C is populated, if this - * add_links() function doesn't continue past a "failed linking to - * S1" and continue linking C to S2, then S2 will get a - * sync_state() callback before C is probed. This is because from - * the perspective of S2, C was never a consumer when its - * sync_state() evaluation is done. To avoid this, the add_links() - * function has to go through all available suppliers of the - * device (that corresponds to this fwnode) and link to them - * before returning. - * - * If some suppliers are not yet available (indicated by an error - * return value), this function will be called again when other - * devices are added to allow creating device links to any newly - * available suppliers. - * - * Return 0 if device links have been successfully created to all - * the known suppliers of this device or if the supplier - * information is not known. - * - * Return -ENODEV if the suppliers needed for probing this device - * have not been registered yet (because device links can only be - * created to devices registered with the driver core). - * - * Return -EAGAIN if some of the suppliers of this device have not - * been registered yet, but none of those suppliers are necessary - * for probing the device. + * @add_links: Create fwnode links to all the suppliers of the fwnode. Return + * zero on success, a negative error code otherwise. */ struct fwnode_operations { struct fwnode_handle *(*get)(struct fwnode_handle *fwnode); @@ -155,7 +119,7 @@ struct fwnode_operations { (*graph_get_port_parent)(struct fwnode_handle *fwnode); int (*graph_parse_endpoint)(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); - int (*add_links)(const struct fwnode_handle *fwnode, + int (*add_links)(struct fwnode_handle *fwnode, struct device *dev); }; -- cgit v1.2.3 From c2c724c868c42c5166bf7aa644dd0a0c8d30b47a Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:27 -0800 Subject: driver core: Add fw_devlink_parse_fwtree() This function is a wrapper around fwnode_operations.add_links(). This function parses each node in a fwnode tree and create fwnode links for each of those nodes. The information for creating the fwnode links (the supplier and consumer fwnode) is obtained by parsing the properties in each of the fwnodes. This function also ensures that no fwnode is parsed more than once by marking the fwnodes as parsed. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-13-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 942a6bb18201..ffa9129182a6 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -15,12 +15,20 @@ struct fwnode_operations; struct device; +/* + * fwnode link flags + * + * LINKS_ADDED: The fwnode has already be parsed to add fwnode links. + */ +#define FWNODE_FLAG_LINKS_ADDED BIT(0) + struct fwnode_handle { struct fwnode_handle *secondary; const struct fwnode_operations *ops; struct device *dev; struct list_head suppliers; struct list_head consumers; + u8 flags; }; struct fwnode_link { -- cgit v1.2.3 From f9aa460672c9c56896cdc12a521159e3e67000ba Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:31 -0800 Subject: driver core: Refactor fw_devlink feature The current implementation of fw_devlink is very inefficient because it tries to get away without creating fwnode links in the name of saving memory usage. Past attempts to optimize runtime at the cost of memory usage were blocked with request for data showing that the optimization made significant improvement for real world scenarios. We have those scenarios now. There have been several reports of boot time increase in the order of seconds in this thread [1]. Several OEMs and SoC manufacturers have also privately reported significant (350-400ms) increase in boot time due to all the parsing done by fw_devlink. So this patch uses all the setup done by the previous patches in this series to refactor fw_devlink to be more efficient. Most of the code has been moved out of firmware specific (DT mostly) code into driver core. This brings the following benefits: - Instead of parsing the device tree multiple times during bootup, fw_devlink parses each fwnode node/property only once and creates fwnode links. The rest of the fw_devlink code then just looks at these fwnode links to do rest of the work. - Makes it much easier to debug probe issue due to fw_devlink in the future. fw_devlink=on blocks the probing of devices if they depend on a device that hasn't been added yet. With this refactor, it'll be very easy to tell what that device is because we now have a reference to the fwnode of the device. - Much easier to add fw_devlink support to ACPI and other firmware types. A refactor to move the common bits from DT specific code to driver core was in my TODO list as a prerequisite to adding ACPI support to fw_devlink. This series gets that done. [1] - https://lore.kernel.org/linux-omap/ea02f57e-871d-cd16-4418-c1da4bbc4696@ti.com/ Tested-by: Laurent Pinchart Tested-by: Grygorii Strashko Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-17-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 1e771ea4dca6..89bb8b84173e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -351,18 +351,13 @@ enum dl_dev_state { * struct dev_links_info - Device data related to device links. * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. - * @needs_suppliers: Hook to global list of devices waiting for suppliers. * @defer_sync: Hook to global list of devices that have deferred sync_state. - * @need_for_probe: If needs_suppliers is on a list, this indicates if the - * suppliers are needed for probe or not. * @status: Driver status information. */ struct dev_links_info { struct list_head suppliers; struct list_head consumers; - struct list_head needs_suppliers; struct list_head defer_sync; - bool need_for_probe; enum dl_dev_state status; }; -- cgit v1.2.3 From 2d09e6eb4a6f20273959f4905ccf009da8c64c7a Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:32 -0800 Subject: driver core: Delete pointless parameter in fwnode_operations.add_links The struct device input to add_links() is not used for anything. So delete it. Acked-by: Rob Herring Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-18-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index ffa9129182a6..fde4ad97564c 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -127,8 +127,7 @@ struct fwnode_operations { (*graph_get_port_parent)(struct fwnode_handle *fwnode); int (*graph_parse_endpoint)(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); - int (*add_links)(struct fwnode_handle *fwnode, - struct device *dev); + int (*add_links)(struct fwnode_handle *fwnode); }; #define fwnode_has_op(fwnode, op) \ -- cgit v1.2.3 From 30b79eb1f92ed5974885d374a4107c94e2dd3e03 Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Mon, 9 Nov 2020 14:48:17 +0100 Subject: soc: xilinx: vcu: use vcu-settings syscon registers Switch the "logicoreip" registers to the new xlnx,vcu-settings binding to be able to read the settings if the settings are specified in a separate device tree node that is shared with other drivers. If the driver is not able to find a node with the new binding, fall back to check for the logicore register bank to be backwards compatible. Signed-off-by: Michael Tretter Reviewed-by: Hyun Kwon Link: https://lore.kernel.org/r/20201109134818.4159342-4-m.tretter@pengutronix.de Signed-off-by: Michal Simek --- include/linux/mfd/syscon/xlnx-vcu.h | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 include/linux/mfd/syscon/xlnx-vcu.h (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/xlnx-vcu.h b/include/linux/mfd/syscon/xlnx-vcu.h new file mode 100644 index 000000000000..d3edec4b7b1d --- /dev/null +++ b/include/linux/mfd/syscon/xlnx-vcu.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Pengutronix, Michael Tretter + */ + +#ifndef __XLNX_VCU_H +#define __XLNX_VCU_H + +#define VCU_ECODER_ENABLE 0x00 +#define VCU_DECODER_ENABLE 0x04 +#define VCU_MEMORY_DEPTH 0x08 +#define VCU_ENC_COLOR_DEPTH 0x0c +#define VCU_ENC_VERTICAL_RANGE 0x10 +#define VCU_ENC_FRAME_SIZE_X 0x14 +#define VCU_ENC_FRAME_SIZE_Y 0x18 +#define VCU_ENC_COLOR_FORMAT 0x1c +#define VCU_ENC_FPS 0x20 +#define VCU_MCU_CLK 0x24 +#define VCU_CORE_CLK 0x28 +#define VCU_PLL_BYPASS 0x2c +#define VCU_ENC_CLK 0x30 +#define VCU_PLL_CLK 0x34 +#define VCU_ENC_VIDEO_STANDARD 0x38 +#define VCU_STATUS 0x3c +#define VCU_AXI_ENC_CLK 0x40 +#define VCU_AXI_DEC_CLK 0x44 +#define VCU_AXI_MCU_CLK 0x48 +#define VCU_DEC_VIDEO_STANDARD 0x4c +#define VCU_DEC_FRAME_SIZE_X 0x50 +#define VCU_DEC_FRAME_SIZE_Y 0x54 +#define VCU_DEC_FPS 0x58 +#define VCU_BUFFER_B_FRAME 0x5c +#define VCU_WPP_EN 0x60 +#define VCU_PLL_CLK_DEC 0x64 +#define VCU_GASKET_INIT 0x74 +#define VCU_GASKET_VALUE 0x03 + +#endif /* __XLNX_VCU_H */ -- cgit v1.2.3 From 7b1c9b8441aa94a549a90fa3d42687ccbad3eade Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Mon, 9 Nov 2020 14:48:18 +0100 Subject: soc: xilinx: vcu: add missing register NUM_CORE The H.264/H.265 Video Codec Unit v1.2 documentation describes this register as follows: Number of encoders core used for the provided configuration This is required for configuring the VCU encoder buffer. Signed-off-by: Michael Tretter Reviewed-by: Hyun Kwon Link: https://lore.kernel.org/r/20201109134818.4159342-5-m.tretter@pengutronix.de Signed-off-by: Michal Simek --- include/linux/mfd/syscon/xlnx-vcu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/xlnx-vcu.h b/include/linux/mfd/syscon/xlnx-vcu.h index d3edec4b7b1d..ff7bc3656f6e 100644 --- a/include/linux/mfd/syscon/xlnx-vcu.h +++ b/include/linux/mfd/syscon/xlnx-vcu.h @@ -32,6 +32,7 @@ #define VCU_BUFFER_B_FRAME 0x5c #define VCU_WPP_EN 0x60 #define VCU_PLL_CLK_DEC 0x64 +#define VCU_NUM_CORE 0x6c #define VCU_GASKET_INIT 0x74 #define VCU_GASKET_VALUE 0x03 -- cgit v1.2.3 From 463edf5a59fd8f0fe0135101d67bfca81d1e3771 Mon Sep 17 00:00:00 2001 From: Wendy Liang Date: Tue, 24 Nov 2020 00:18:18 -0800 Subject: firmware: xlnx-zynqmp: fix compilation warning Fix compilation warning when ZYNQMP_FIRMWARE is not defined. include/linux/firmware/xlnx-zynqmp.h: In function 'zynqmp_pm_get_eemi_ops': include/linux/firmware/xlnx-zynqmp.h:363:9: error: implicit declaration of function 'ERR_PTR' [-Werror=implicit-function-declaration] 363 | return ERR_PTR(-ENODEV); include/linux/firmware/xlnx-zynqmp.h:363:18: note: each undeclared identifier is reported only once for each function it appears in include/linux/firmware/xlnx-zynqmp.h: In function 'zynqmp_pm_get_api_version': include/linux/firmware/xlnx-zynqmp.h:367:10: error: 'ENODEV' undeclared (first use in this function) 367 | return -ENODEV; | ^~~~~~ Signed-off-by: Wendy Liang Link: https://lore.kernel.org/r/1606205898-12642-1-git-send-email-wendy.liang@xilinx.com Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 5968df82b991..f84244ea633b 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -13,6 +13,8 @@ #ifndef __FIRMWARE_ZYNQMP_H__ #define __FIRMWARE_ZYNQMP_H__ +#include + #define ZYNQMP_PM_VERSION_MAJOR 1 #define ZYNQMP_PM_VERSION_MINOR 0 -- cgit v1.2.3 From 1f6a11a01059f9c65f8461987cc0bab4c0b58338 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 2 Dec 2020 08:38:48 +0100 Subject: firmware: xilinx: Remove additional newline This additional newline is useless and also reported by checkpatch --strict. Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/d927f3f2c97910958dd77a22828cd0bf8d89c9de.1606894725.git.michal.simek@xilinx.com --- include/linux/firmware/xlnx-zynqmp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index f84244ea633b..0db9005782d6 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -316,7 +316,6 @@ struct zynqmp_pm_query_data { u32 arg3; }; - int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 *ret_payload); -- cgit v1.2.3 From a80cefec2c2783166727324bde724c39aa8a12df Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 2 Dec 2020 08:38:49 +0100 Subject: firmware: xilinx: Add a blank line after function declaration Fix all these issues which are also reported by checkpatch --strict. Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/7b6007e05f6c01214861a37f198cd5bee62a4d3e.1606894725.git.michal.simek@xilinx.com --- include/linux/firmware/xlnx-zynqmp.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 0db9005782d6..0e7e72650ed3 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -363,107 +363,132 @@ static inline struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void) { return ERR_PTR(-ENODEV); } + static inline int zynqmp_pm_get_api_version(u32 *version) { return -ENODEV; } + static inline int zynqmp_pm_get_chipid(u32 *idcode, u32 *version) { return -ENODEV; } + static inline int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out) { return -ENODEV; } + static inline int zynqmp_pm_clock_enable(u32 clock_id) { return -ENODEV; } + static inline int zynqmp_pm_clock_disable(u32 clock_id) { return -ENODEV; } + static inline int zynqmp_pm_clock_getstate(u32 clock_id, u32 *state) { return -ENODEV; } + static inline int zynqmp_pm_clock_setdivider(u32 clock_id, u32 divider) { return -ENODEV; } + static inline int zynqmp_pm_clock_getdivider(u32 clock_id, u32 *divider) { return -ENODEV; } + static inline int zynqmp_pm_clock_setrate(u32 clock_id, u64 rate) { return -ENODEV; } + static inline int zynqmp_pm_clock_getrate(u32 clock_id, u64 *rate) { return -ENODEV; } + static inline int zynqmp_pm_clock_setparent(u32 clock_id, u32 parent_id) { return -ENODEV; } + static inline int zynqmp_pm_clock_getparent(u32 clock_id, u32 *parent_id) { return -ENODEV; } + static inline int zynqmp_pm_set_pll_frac_mode(u32 clk_id, u32 mode) { return -ENODEV; } + static inline int zynqmp_pm_get_pll_frac_mode(u32 clk_id, u32 *mode) { return -ENODEV; } + static inline int zynqmp_pm_set_pll_frac_data(u32 clk_id, u32 data) { return -ENODEV; } + static inline int zynqmp_pm_get_pll_frac_data(u32 clk_id, u32 *data) { return -ENODEV; } + static inline int zynqmp_pm_set_sd_tapdelay(u32 node_id, u32 type, u32 value) { return -ENODEV; } + static inline int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type) { return -ENODEV; } + static inline int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, const enum zynqmp_pm_reset_action assert_flag) { return -ENODEV; } + static inline int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, u32 *status) { return -ENODEV; } + static inline int zynqmp_pm_init_finalize(void) { return -ENODEV; } + static inline int zynqmp_pm_set_suspend_mode(u32 mode) { return -ENODEV; } + static inline int zynqmp_pm_request_node(const u32 node, const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack) { return -ENODEV; } + static inline int zynqmp_pm_release_node(const u32 node) { return -ENODEV; } + static inline int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities, const u32 qos, @@ -471,39 +496,48 @@ static inline int zynqmp_pm_set_requirement(const u32 node, { return -ENODEV; } + static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) { return -ENODEV; } + static inline int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags) { return -ENODEV; } + static inline int zynqmp_pm_fpga_get_status(u32 *value) { return -ENODEV; } + static inline int zynqmp_pm_write_ggs(u32 index, u32 value) { return -ENODEV; } + static inline int zynqmp_pm_read_ggs(u32 index, u32 *value) { return -ENODEV; } + static inline int zynqmp_pm_write_pggs(u32 index, u32 value) { return -ENODEV; } + static inline int zynqmp_pm_read_pggs(u32 index, u32 *value) { return -ENODEV; } + static inline int zynqmp_pm_system_shutdown(const u32 type, const u32 subtype) { return -ENODEV; } + static inline int zynqmp_pm_set_boot_health_status(u32 value) { return -ENODEV; -- cgit v1.2.3 From 311c2520de21cb2f44291ad3d984b42191126628 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 2 Dec 2020 08:38:50 +0100 Subject: firmware: xilinx: Properly align function parameter Fix parameters alignment reported by checkpatch --strict. Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/00ed9fcb94a6c22eff1fe8afdea46b2764a8687d.1606894725.git.michal.simek@xilinx.com --- include/linux/firmware/xlnx-zynqmp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 0e7e72650ed3..edc2977b26d9 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -456,7 +456,7 @@ static inline int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type) } static inline int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, - const enum zynqmp_pm_reset_action assert_flag) + const enum zynqmp_pm_reset_action assert_flag) { return -ENODEV; } @@ -490,9 +490,9 @@ static inline int zynqmp_pm_release_node(const u32 node) } static inline int zynqmp_pm_set_requirement(const u32 node, - const u32 capabilities, - const u32 qos, - const enum zynqmp_pm_request_ack ack) + const u32 capabilities, + const u32 qos, + const enum zynqmp_pm_request_ack ack) { return -ENODEV; } -- cgit v1.2.3 From 5b4258f6721f41b092c63f6ee71be76e9616718b Mon Sep 17 00:00:00 2001 From: Ricky Wu Date: Wed, 2 Dec 2020 14:58:57 +0800 Subject: misc: rtsx: rts5249 support runtime PM rtsx_pcr: add callback functions to support runtime PM add delay_work to put device to D3 after idle over 10 sec rts5249: add extra init flow for rtd3 and set rtd3_en from config setting rtsx_pci_sdmmc: child device support autosuspend Signed-off-by: Ricky Wu Link: https://lore.kernel.org/r/20201202065857.19412-1-ricky_wu@realtek.com Signed-off-by: Greg Kroah-Hartman --- include/linux/rtsx_pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 745f5e73f99a..f895ccabbe29 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -1174,6 +1174,7 @@ struct rtsx_pcr { struct delayed_work carddet_work; struct delayed_work idle_work; + struct delayed_work rtd3_work; spinlock_t lock; struct mutex pcr_mutex; @@ -1183,6 +1184,7 @@ struct rtsx_pcr { unsigned int cur_clock; bool remove_pci; bool msi_en; + bool is_runtime_suspended; #define EXTRA_CAPS_SD_SDR50 (1 << 0) #define EXTRA_CAPS_SD_SDR104 (1 << 1) -- cgit v1.2.3 From 8010622c86ca5bb44bc98492f5968726fc7c7a21 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 9 Dec 2020 16:26:39 +0100 Subject: USB: UAS: introduce a quirk to set no_write_same UAS does not share the pessimistic assumption storage is making that devices cannot deal with WRITE_SAME. A few devices supported by UAS, are reported to not deal well with WRITE_SAME. Those need a quirk. Add it to the device that needs it. Reported-by: David C. Partridge Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20201209152639.9195-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 4a19ac3f24d0..6b03fdd69d27 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -84,6 +84,8 @@ /* Cannot handle REPORT_LUNS */ \ US_FLAG(ALWAYS_SYNC, 0x20000000) \ /* lies about caching, so always sync */ \ + US_FLAG(NO_SAME, 0x40000000) \ + /* Cannot handle WRITE_SAME */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From c73ebb685fb6dfb513d394cbea64fb81ba3d994f Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Tue, 3 Nov 2020 10:54:37 +0800 Subject: io_uring: add timeout support for io_uring_enter() Now users who want to get woken when waiting for events should submit a timeout command first. It is not safe for applications that split SQ and CQ handling between two threads, such as mysql. Users should synchronize the two threads explicitly to protect SQ and that will impact the performance. This patch adds support for timeout to existing io_uring_enter(). To avoid overloading arguments, it introduces a new parameter structure which contains sigmask and timeout. I have tested the workloads with one thread submiting nop requests while the other reaping the cqe with timeout. It shows 1.8~2x faster when the iodepth is 16. Signed-off-by: Jiufei Xue Signed-off-by: Hao Xu [axboe: various cleanups/fixes, and name change to SIG_IS_DATA] Signed-off-by: Jens Axboe --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 37bea07c12f2..8576e8bf92fe 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -317,7 +317,7 @@ asmlinkage long sys_io_uring_setup(u32 entries, struct io_uring_params __user *p); asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit, u32 min_complete, u32 flags, - const sigset_t __user *sig, size_t sigsz); + const void __user *argp, size_t argsz); asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op, void __user *arg, unsigned int nr_args); -- cgit v1.2.3 From 5a6338cce9f4133c478d3b10b300f96dd644379a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 17 Nov 2020 15:32:06 +0530 Subject: mailbox: arm_mhuv2: Add driver This adds driver for the ARM MHUv2 (Message Handling Unit) mailbox controller. This is based on the accepted DT bindings of the controller and supports combination of both transport protocols, i.e. doorbell and data-transfer. Transmitting and receiving data through the mailbox framework is done through struct arm_mhuv2_mbox_msg. Based on the initial work done by Morten Borup Petersen from ARM. Co-developed-by: Tushar Khandelwal Signed-off-by: Tushar Khandelwal Tested-by: Usama Arif Signed-off-by: Viresh Kumar Signed-off-by: Jassi Brar --- include/linux/mailbox/arm_mhuv2_message.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/mailbox/arm_mhuv2_message.h (limited to 'include/linux') diff --git a/include/linux/mailbox/arm_mhuv2_message.h b/include/linux/mailbox/arm_mhuv2_message.h new file mode 100644 index 000000000000..821b9d96daa4 --- /dev/null +++ b/include/linux/mailbox/arm_mhuv2_message.h @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM MHUv2 Mailbox Message + * + * Copyright (C) 2020 Arm Ltd. + * Copyright (C) 2020 Linaro Ltd. + */ + +#ifndef _LINUX_ARM_MHUV2_MESSAGE_H_ +#define _LINUX_ARM_MHUV2_MESSAGE_H_ + +#include + +/* Data structure for data-transfer protocol */ +struct arm_mhuv2_mbox_msg { + void *data; + size_t len; +}; + +#endif /* _LINUX_ARM_MHUV2_MESSAGE_H_ */ -- cgit v1.2.3 From d40c2d4ed62df64ce603c208bceff25245380157 Mon Sep 17 00:00:00 2001 From: Hsin-Hsiung Wang Date: Wed, 9 Dec 2020 18:33:43 -0800 Subject: spmi: Add driver shutdown support Add new shutdown() method. Use it in the standard driver model style. Link: https://lore.kernel.org/r/1603187810-30481-2-git-send-email-hsin-hsiung.wang@mediatek.com Signed-off-by: Hsin-Hsiung Wang Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20201210023344.2838141-4-sboyd@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/spmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spmi.h b/include/linux/spmi.h index 394a3f68bad5..729bcbf9f5ad 100644 --- a/include/linux/spmi.h +++ b/include/linux/spmi.h @@ -138,6 +138,7 @@ struct spmi_driver { struct device_driver driver; int (*probe)(struct spmi_device *sdev); void (*remove)(struct spmi_device *sdev); + void (*shutdown)(struct spmi_device *sdev); }; static inline struct spmi_driver *to_spmi_driver(struct device_driver *d) -- cgit v1.2.3 From 1c12c27086dcef853832a7cbebcb48bdac8104b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 25 Nov 2020 10:31:06 +0100 Subject: siox: Make remove callback return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver core ignores the return value of the remove callback, so don't give siox drivers the chance to provide a value. All siox drivers only allocate devm-managed resources in .probe, so there is no .remove callback to fix. Tested-by: Thorsten Scherer Acked-by: Thorsten Scherer Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20201125093106.240643-3-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/siox.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/siox.h b/include/linux/siox.h index da7225bf1877..6bfbda3f634c 100644 --- a/include/linux/siox.h +++ b/include/linux/siox.h @@ -36,7 +36,7 @@ bool siox_device_connected(struct siox_device *sdevice); struct siox_driver { int (*probe)(struct siox_device *sdevice); - int (*remove)(struct siox_device *sdevice); + void (*remove)(struct siox_device *sdevice); void (*shutdown)(struct siox_device *sdevice); /* -- cgit v1.2.3 From 0aec2da436623abe19b80b21dd9fc5ec9300a152 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Dec 2020 22:36:38 +0200 Subject: driver core: platform: Introduce platform_get_mem_or_io() There are at least few existing users of the proposed API which retrieves either MEM or IO resource from platform device. Make it common to utilize in the existing and new users. Cc: Eric Auger Cc: Alex Williamson Cc: kvm@vger.kernel.org Cc: linux-usb@vger.kernel.org Cc: Peng Hao Cc: Arnd Bergmann Reviewed-by: Cornelia Huck Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201209203642.27648-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 77a2aada106d..ee6a9f10c2c7 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -52,6 +52,9 @@ extern struct device platform_bus; extern struct resource *platform_get_resource(struct platform_device *, unsigned int, unsigned int); +extern struct resource *platform_get_mem_or_io(struct platform_device *, + unsigned int); + extern struct device * platform_find_device_by_driver(struct device *start, const struct device_driver *drv); -- cgit v1.2.3 From 1f702603e7125a390b5cdf5ce00539781cfcc86a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:19 -0600 Subject: exec: Simplify unshare_files Now that exec no longer needs to return the unshared files to their previous value there is no reason to return displaced. Instead when unshare_fd creates a copy of the file table, call put_files_struct before returning from unshare_files. Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200817220425.9389-2-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-2-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index a32bf47c593e..f46a084b60b2 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -109,7 +109,7 @@ struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); -int unshare_files(struct files_struct **); +int unshare_files(void); struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; void do_close_on_exec(struct files_struct *); int iterate_fd(struct files_struct *, unsigned, -- cgit v1.2.3 From 950db38ff2c01b7aabbd7ab4a50b7992750fa63d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:20 -0600 Subject: exec: Remove reset_files_struct Now that exec no longer needs to restore the previous value of current->files on error there are no more callers of reset_files_struct so remove it. Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200817220425.9389-3-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-3-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f46a084b60b2..7cc9885044d9 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -108,7 +108,6 @@ struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); -void reset_files_struct(struct files_struct *); int unshare_files(void); struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; void do_close_on_exec(struct files_struct *); -- cgit v1.2.3 From bebf684bf330915e6c96313ad7db89a5480fc9c2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:24 -0600 Subject: file: Rename __fcheck_files to files_lookup_fd_raw The function fcheck despite it's comment is poorly named as it has no callers that only check it's return value. All of fcheck's callers use the returned file descriptor. The same is true for fcheck_files and __fcheck_files. A new less confusing name is needed. In addition the names of these functions are confusing as they do not report the kind of locks that are needed to be held when these functions are called making error prone to use them. To remedy this I am making the base functio name lookup_fd and will and prefixes and sufficies to indicate the rest of the context. Name the function (previously called __fcheck_files) that proceeds from a struct files_struct, looks up the struct file of a file descriptor, and requires it's callers to verify all of the appropriate locks are held files_lookup_fd_raw. The need for better names became apparent in the last round of discussion of this set of changes[1]. [1] https://lkml.kernel.org/r/CAHk-=wj8BQbgJFLa+J0e=iT-1qpmCRTbPAJ8gd6MJQ=kbRPqyQ@mail.gmail.com Link: https://lkml.kernel.org/r/20201120231441.29911-7-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 7cc9885044d9..639933f37da9 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -80,7 +80,7 @@ struct dentry; /* * The caller must ensure that fd table isn't shared or hold rcu or file lock */ -static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd) +static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = rcu_dereference_raw(files->fdt); @@ -96,7 +96,7 @@ static inline struct file *fcheck_files(struct files_struct *files, unsigned int RCU_LOCKDEP_WARN(!rcu_read_lock_held() && !lockdep_is_held(&files->file_lock), "suspicious rcu_dereference_check() usage"); - return __fcheck_files(files, fd); + return files_lookup_fd_raw(files, fd); } /* -- cgit v1.2.3 From 120ce2b0cd52abe73e8b16c23461eb14df5a87d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:25 -0600 Subject: file: Factor files_lookup_fd_locked out of fcheck_files To make it easy to tell where files->file_lock protection is being used when looking up a file create files_lookup_fd_locked. Only allow this function to be called with the file_lock held. Update the callers of fcheck and fcheck_files that are called with the files->file_lock held to call files_lookup_fd_locked instead. Hopefully this makes it easier to quickly understand what is going on. The need for better names became apparent in the last round of discussion of this set of changes[1]. [1] https://lkml.kernel.org/r/CAHk-=wj8BQbgJFLa+J0e=iT-1qpmCRTbPAJ8gd6MJQ=kbRPqyQ@mail.gmail.com Link: https://lkml.kernel.org/r/20201120231441.29911-8-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 639933f37da9..fda4b81dd735 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -91,6 +91,13 @@ static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsig return NULL; } +static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd) +{ + RCU_LOCKDEP_WARN(!lockdep_is_held(&files->file_lock), + "suspicious rcu_dereference_check() usage"); + return files_lookup_fd_raw(files, fd); +} + static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd) { RCU_LOCKDEP_WARN(!rcu_read_lock_held() && -- cgit v1.2.3 From f36c2943274199cb8aef32ac96531ffb7c4b43d0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:26 -0600 Subject: file: Replace fcheck_files with files_lookup_fd_rcu This change renames fcheck_files to files_lookup_fd_rcu. All of the remaining callers take the rcu_read_lock before calling this function so the _rcu suffix is appropriate. This change also tightens up the debug check to verify that all callers hold the rcu_read_lock. All callers that used to call files_check with the files->file_lock held have now been changed to call files_lookup_fd_locked. This change of name has helped remind me of which locks and which guarantees are in place helping me to catch bugs later in the patchset. The need for better names became apparent in the last round of discussion of this set of changes[1]. [1] https://lkml.kernel.org/r/CAHk-=wj8BQbgJFLa+J0e=iT-1qpmCRTbPAJ8gd6MJQ=kbRPqyQ@mail.gmail.com Link: https://lkml.kernel.org/r/20201120231441.29911-9-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index fda4b81dd735..fa8c402a7790 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -98,10 +98,9 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un return files_lookup_fd_raw(files, fd); } -static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd) +static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsigned int fd) { - RCU_LOCKDEP_WARN(!rcu_read_lock_held() && - !lockdep_is_held(&files->file_lock), + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious rcu_dereference_check() usage"); return files_lookup_fd_raw(files, fd); } @@ -109,7 +108,7 @@ static inline struct file *fcheck_files(struct files_struct *files, unsigned int /* * Check whether the specified fd has an open file. */ -#define fcheck(fd) fcheck_files(current->files, fd) +#define fcheck(fd) files_lookup_fd_rcu(current->files, fd) struct task_struct; -- cgit v1.2.3 From 460b4f812a9d473d4b39d87d37844f9fc30a9eb3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:27 -0600 Subject: file: Rename fcheck lookup_fd_rcu Also remove the confusing comment about checking if a fd exists. I could not find one instance in the entire kernel that still matches the description or the reason for the name fcheck. The need for better names became apparent in the last round of discussion of this set of changes[1]. [1] https://lkml.kernel.org/r/CAHk-=wj8BQbgJFLa+J0e=iT-1qpmCRTbPAJ8gd6MJQ=kbRPqyQ@mail.gmail.com Link: https://lkml.kernel.org/r/20201120231441.29911-10-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index fa8c402a7790..2a4a8fed536e 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -105,10 +105,10 @@ static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsig return files_lookup_fd_raw(files, fd); } -/* - * Check whether the specified fd has an open file. - */ -#define fcheck(fd) files_lookup_fd_rcu(current->files, fd) +static inline struct file *lookup_fd_rcu(unsigned int fd) +{ + return files_lookup_fd_rcu(current->files, fd); +} struct task_struct; -- cgit v1.2.3 From 3a879fb38082125cc0d8aa89b70c7f3a7cdf584b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:28 -0600 Subject: file: Implement task_lookup_fd_rcu As a companion to lookup_fd_rcu implement task_lookup_fd_rcu for querying an arbitrary process about a specific file. Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200818103713.aw46m7vprsy4vlve@wittgenstein Link: https://lkml.kernel.org/r/20201120231441.29911-11-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 2a4a8fed536e..a0558ae9b40c 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -110,6 +110,8 @@ static inline struct file *lookup_fd_rcu(unsigned int fd) return files_lookup_fd_rcu(current->files, fd); } +struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd); + struct task_struct; struct files_struct *get_files_struct(struct task_struct *); -- cgit v1.2.3 From e9a53aeb5e0a838f10fcea74235664e7ad5e6e1a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:31 -0600 Subject: file: Implement task_lookup_next_fd_rcu As a companion to fget_task and task_lookup_fd_rcu implement task_lookup_next_fd_rcu that will return the struct file for the first file descriptor number that is equal or greater than the fd argument value, or NULL if there is no such struct file. This allows file descriptors of foreign processes to be iterated through safely, without needed to increment the count on files_struct. Some concern[1] has been expressed that this function takes the task_lock for each iteration and thus for each file descriptor. This place where this function will be called in a commonly used code path is for listing /proc//fd. I did some small benchmarks and did not see any measurable performance differences. For ordinary users ls is likely to stat each of the directory entries and tid_fd_mode called from tid_fd_revalidae has always taken the task lock for each file descriptor. So this does not look like it will be a big change in practice. At some point is will probably be worth changing put_files_struct to free files_struct after an rcu grace period so that task_lock won't be needed at all. [1] https://lkml.kernel.org/r/20200817220425.9389-10-ebiederm@xmission.com v1: https://lkml.kernel.org/r/20200817220425.9389-9-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-14-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index a0558ae9b40c..cf6c52dae3a1 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -111,6 +111,7 @@ static inline struct file *lookup_fd_rcu(unsigned int fd) } struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd); +struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd); struct task_struct; -- cgit v1.2.3 From d74ba04d919ebe30bf47406819c18c6b50003d92 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:35 -0600 Subject: file: Merge __fd_install into fd_install The function __fd_install was added to support binder[1]. With binder fixed[2] there are no more users. As fd_install just calls __fd_install with "files=current->files", merge them together by transforming the files parameter into a local variable initialized to current->files. [1] f869e8a7f753 ("expose a low-level variant of fd_install() for binder") [2] 44d8047f1d87 ("binder: use standard functions to allocate fds") Acked-by: Christian Brauner v1:https://lkml.kernel.org/r/20200817220425.9389-14-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-18-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index cf6c52dae3a1..a5ec736d74a5 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -126,8 +126,6 @@ int iterate_fd(struct files_struct *, unsigned, extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); -extern void __fd_install(struct files_struct *files, - unsigned int fd, struct file *file); extern int __close_fd(struct files_struct *files, unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); -- cgit v1.2.3 From aa384d10f3d06d4b85597ff5df41551262220e16 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:37 -0600 Subject: file: Merge __alloc_fd into alloc_fd The function __alloc_fd was added to support binder[1]. With binder fixed[2] there are no more users. As alloc_fd just calls __alloc_fd with "files=current->files", merge them together by transforming the files parameter into a local variable initialized to current->files. [1] dcfadfa4ec5a ("new helper: __alloc_fd()") [2] 44d8047f1d87 ("binder: use standard functions to allocate fds") Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200817220425.9389-16-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-20-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index a5ec736d74a5..dc476ae92f56 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -124,8 +124,6 @@ int iterate_fd(struct files_struct *, unsigned, int (*)(const void *, struct file *, unsigned), const void *); -extern int __alloc_fd(struct files_struct *files, - unsigned start, unsigned end, unsigned flags); extern int __close_fd(struct files_struct *files, unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); -- cgit v1.2.3 From 8760c909f54a82aaa6e76da19afe798a0c77c3c3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:38 -0600 Subject: file: Rename __close_fd to close_fd and remove the files parameter The function __close_fd was added to support binder[1]. Now that binder has been fixed to no longer need __close_fd[2] all calls to __close_fd pass current->files. Therefore transform the files parameter into a local variable initialized to current->files, and rename __close_fd to close_fd to reflect this change, and keep it in sync with the similar changes to __alloc_fd, and __fd_install. This removes the need for callers to care about the extra care that needs to be take if anything except current->files is passed, by limiting the callers to only operation on current->files. [1] 483ce1d4b8c3 ("take descriptor-related part of close() to file.c") [2] 44d8047f1d87 ("binder: use standard functions to allocate fds") Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200817220425.9389-17-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-21-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 3 +-- include/linux/syscalls.h | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index dc476ae92f56..dad4a488ce60 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -124,8 +124,7 @@ int iterate_fd(struct files_struct *, unsigned, int (*)(const void *, struct file *, unsigned), const void *); -extern int __close_fd(struct files_struct *files, - unsigned int fd); +extern int close_fd(unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); extern int __close_fd_get_file(unsigned int fd, struct file **res); extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 37bea07c12f2..9e055a0579f8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1295,16 +1295,16 @@ static inline long ksys_ftruncate(unsigned int fd, loff_t length) return do_sys_ftruncate(fd, length, 1); } -extern int __close_fd(struct files_struct *files, unsigned int fd); +extern int close_fd(unsigned int fd); /* * In contrast to sys_close(), this stub does not check whether the syscall * should or should not be restarted, but returns the raw error codes from - * __close_fd(). + * close_fd(). */ static inline int ksys_close(unsigned int fd) { - return __close_fd(current->files, fd); + return close_fd(fd); } extern long do_sys_truncate(const char __user *pathname, loff_t length); -- cgit v1.2.3 From 1572bfdf21d4d50e51941498ffe0b56c2289f783 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:39 -0600 Subject: file: Replace ksys_close with close_fd Now that ksys_close is exactly identical to close_fd replace the one caller of ksys_close with close_fd. [1] https://lkml.kernel.org/r/20200818112020.GA17080@infradead.org Suggested-by: Christoph Hellwig Link: https://lkml.kernel.org/r/20201120231441.29911-22-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/syscalls.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 9e055a0579f8..0f72f380db72 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1295,18 +1295,6 @@ static inline long ksys_ftruncate(unsigned int fd, loff_t length) return do_sys_ftruncate(fd, length, 1); } -extern int close_fd(unsigned int fd); - -/* - * In contrast to sys_close(), this stub does not check whether the syscall - * should or should not be restarted, but returns the raw error codes from - * close_fd(). - */ -static inline int ksys_close(unsigned int fd) -{ - return close_fd(fd); -} - extern long do_sys_truncate(const char __user *pathname, loff_t length); static inline long ksys_truncate(const char __user *pathname, loff_t length) -- cgit v1.2.3 From 9fe83c43e71cdb8e5b9520bcb98706a2b3c680c8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:40 -0600 Subject: file: Rename __close_fd_get_file close_fd_get_file The function close_fd_get_file is explicitly a variant of __close_fd[1]. Now that __close_fd has been renamed close_fd, rename close_fd_get_file to be consistent with close_fd. When __alloc_fd, __close_fd and __fd_install were introduced the double underscore indicated that the function took a struct files_struct parameter. The function __close_fd_get_file never has so the naming has always been inconsistent. This just cleans things up so there are not any lingering mentions or references __close_fd left in the code. [1] 80cd795630d6 ("binder: fix use-after-free due to ksys_close() during fdget()") Link: https://lkml.kernel.org/r/20201120231441.29911-23-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index dad4a488ce60..4ed3589f9294 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -126,7 +126,7 @@ int iterate_fd(struct files_struct *, unsigned, extern int close_fd(unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); -extern int __close_fd_get_file(unsigned int fd, struct file **res); +extern int close_fd_get_file(unsigned int fd, struct file **res); extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, struct files_struct **new_fdp); -- cgit v1.2.3 From fa67bf885e5211c7dce9514ef2877212c0a5e09e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:41 -0600 Subject: file: Remove get_files_struct When discussing[1] exec and posix file locks it was realized that none of the callers of get_files_struct fundamentally needed to call get_files_struct, and that by switching them to helper functions instead it will both simplify their code and remove unnecessary increments of files_struct.count. Those unnecessary increments can result in exec unnecessarily unsharing files_struct which breaking posix locks, and it can result in fget_light having to fallback to fget reducing system performance. Now that get_files_struct has no more users and can not cause the problems for posix file locking and fget_light remove get_files_struct so that it does not gain any new users. [1] https://lkml.kernel.org/r/20180915160423.GA31461@redhat.com Suggested-by: Oleg Nesterov Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200817220425.9389-13-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-24-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 4ed3589f9294..d0e78174874a 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -115,7 +115,6 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd) struct task_struct; -struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); int unshare_files(void); struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; -- cgit v1.2.3 From f7cfd871ae0c5008d94b6f66834e7845caa93c15 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Dec 2020 14:12:00 -0600 Subject: exec: Transform exec_update_mutex into a rw_semaphore Recently syzbot reported[0] that there is a deadlock amongst the users of exec_update_mutex. The problematic lock ordering found by lockdep was: perf_event_open (exec_update_mutex -> ovl_i_mutex) chown (ovl_i_mutex -> sb_writes) sendfile (sb_writes -> p->lock) by reading from a proc file and writing to overlayfs proc_pid_syscall (p->lock -> exec_update_mutex) While looking at possible solutions it occured to me that all of the users and possible users involved only wanted to state of the given process to remain the same. They are all readers. The only writer is exec. There is no reason for readers to block on each other. So fix this deadlock by transforming exec_update_mutex into a rw_semaphore named exec_update_lock that only exec takes for writing. Cc: Jann Horn Cc: Vasiliy Kulikov Cc: Al Viro Cc: Bernd Edlinger Cc: Oleg Nesterov Cc: Christopher Yeoh Cc: Cyrill Gorcunov Cc: Sargun Dhillon Cc: Christian Brauner Cc: Arnd Bergmann Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Fixes: eea9673250db ("exec: Add exec_update_mutex to replace cred_guard_mutex") [0] https://lkml.kernel.org/r/00000000000063640c05ade8e3de@google.com Reported-by: syzbot+db9cdf3dd1f64252c6ef@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/87ft4mbqen.fsf@x220.int.ebiederm.org Signed-off-by: Eric W. Biederman --- include/linux/sched/signal.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 1bad18a1d8ba..4b6a8234d7fc 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -228,12 +228,13 @@ struct signal_struct { * credential calculations * (notably. ptrace) * Deprecated do not use in new code. - * Use exec_update_mutex instead. - */ - struct mutex exec_update_mutex; /* Held while task_struct is being - * updated during exec, and may have - * inconsistent permissions. + * Use exec_update_lock instead. */ + struct rw_semaphore exec_update_lock; /* Held while task_struct is + * being updated during exec, + * and may have inconsistent + * permissions. + */ } __randomize_layout; /* -- cgit v1.2.3 From adf60a870e9130c7883ec2ab798484e05f24db39 Mon Sep 17 00:00:00 2001 From: Siddharth Gupta Date: Thu, 19 Nov 2020 13:05:32 -0800 Subject: remoteproc: core: Add ops to enable custom coredump functionality Each remoteproc might have different requirements for coredumps and might want to choose the type of dumps it wants to collect. This change allows remoteproc drivers to specify their own custom dump function to be executed in place of rproc_coredump. If the coredump op is not specified by the remoteproc driver it will be set to rproc_coredump by default. Reviewed-by: Bjorn Andersson Signed-off-by: Siddharth Gupta Link: https://lore.kernel.org/r/1605819935-10726-2-git-send-email-sidgup@codeaurora.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index e8ac041c64d9..121c55eff3d2 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -375,6 +375,7 @@ enum rsc_handling_status { * @get_boot_addr: get boot address to entry point specified in firmware * @panic: optional callback to react to system panic, core will delay * panic at least the returned number of milliseconds + * @coredump: collect firmware dump after the subsystem is shutdown */ struct rproc_ops { int (*prepare)(struct rproc *rproc); @@ -393,6 +394,7 @@ struct rproc_ops { int (*sanity_check)(struct rproc *rproc, const struct firmware *fw); u64 (*get_boot_addr)(struct rproc *rproc, const struct firmware *fw); unsigned long (*panic)(struct rproc *rproc); + void (*coredump)(struct rproc *rproc); }; /** -- cgit v1.2.3 From abc72b646066075acf9121a2a68aad39f550813d Mon Sep 17 00:00:00 2001 From: Siddharth Gupta Date: Thu, 19 Nov 2020 13:05:33 -0800 Subject: remoteproc: coredump: Add minidump functionality This change adds a new kind of core dump mechanism which instead of dumping entire program segments of the firmware, dumps sections of the remoteproc memory which are sufficient to allow debugging the firmware. This function thus uses section headers instead of program headers during creation of the core dump elf. Reviewed-by: Bjorn Andersson Co-developed-by: Rishabh Bhatnagar Signed-off-by: Rishabh Bhatnagar Signed-off-by: Siddharth Gupta Link: https://lore.kernel.org/r/1605819935-10726-3-git-send-email-sidgup@codeaurora.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 121c55eff3d2..f28ee75d1005 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -657,6 +657,7 @@ int rproc_boot(struct rproc *rproc); void rproc_shutdown(struct rproc *rproc); int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); +void rproc_coredump_using_sections(struct rproc *rproc); int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size); int rproc_coredump_add_custom_segment(struct rproc *rproc, dma_addr_t da, size_t size, -- cgit v1.2.3 From e7708f5b10e205d6291bb495e645a03553b9768b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Sun, 29 Nov 2020 23:07:39 +0000 Subject: PCI: Unify ECAM constants in native PCI Express drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ECAM-related constants to provide a set of standard constants defining memory address shift values to the byte-level address that can be used to access the PCI Express Configuration Space, and then move native PCI Express controller drivers to use the newly introduced definitions retiring driver-specific ones. Refactor pci_ecam_map_bus() function to use newly added constants so that limits to the bus, device function and offset (now limited to 4K as per the specification) are in place to prevent the defective or malicious caller from supplying incorrect configuration offset and thus targeting the wrong device when accessing extended configuration space. This refactor also allows for the ".bus_shift" initialisers to be dropped when the user is not using a custom value as a default value will be used as per the PCI Express Specification. Thanks to Qian Cai , Michael Walle , and Vladimir Oltean for reporting a pci_ecam_create() issue with .bus_shift and to Vladimir for proposing the fix. [bhelgaas: incorporate Vladimir's fix, update commit log] Suggested-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20201129230743.3006978-2-kw@linux.com Tested-by: Michael Walle Signed-off-by: Krzysztof Wilczyński Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Jon Derrick Reviewed-by: Bjorn Helgaas --- include/linux/pci-ecam.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 033ce74f02e8..65d3d83015c3 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -9,6 +9,33 @@ #include #include +/* + * Memory address shift values for the byte-level address that + * can be used when accessing the PCI Express Configuration Space. + */ + +/* + * Enhanced Configuration Access Mechanism (ECAM) + * + * See PCI Express Base Specification, Revision 5.0, Version 1.0, + * Section 7.2.2, Table 7-1, p. 677. + */ +#define PCIE_ECAM_BUS_SHIFT 20 /* Bus number */ +#define PCIE_ECAM_DEVFN_SHIFT 12 /* Device and Function number */ + +#define PCIE_ECAM_BUS_MASK 0xff +#define PCIE_ECAM_DEVFN_MASK 0xff +#define PCIE_ECAM_REG_MASK 0xfff /* Limit offset to a maximum of 4K */ + +#define PCIE_ECAM_BUS(x) (((x) & PCIE_ECAM_BUS_MASK) << PCIE_ECAM_BUS_SHIFT) +#define PCIE_ECAM_DEVFN(x) (((x) & PCIE_ECAM_DEVFN_MASK) << PCIE_ECAM_DEVFN_SHIFT) +#define PCIE_ECAM_REG(x) ((x) & PCIE_ECAM_REG_MASK) + +#define PCIE_ECAM_OFFSET(bus, devfn, where) \ + (PCIE_ECAM_BUS(bus) | \ + PCIE_ECAM_DEVFN(devfn) | \ + PCIE_ECAM_REG(where)) + /* * struct to hold pci ops and bus shift of the config window * for a PCI controller. -- cgit v1.2.3 From 9994bb3f36e3d181d9f0a078609038080cfd7a3d Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:13 +0200 Subject: mtd: nand: ecc-bch: Create the software BCH engine Let's continue introducing the generic ECC engine abstraction in the NAND subsystem by instantiating a first ECC engine: the software BCH one. While at it, make a very tidy ecc_sw_bch_init() function and move all the sanity checks and user input management in nand_ecc_sw_bch_init_ctx(). This second helper will be called from the raw RAND core. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-10-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 16 ++++++++-------- include/linux/mtd/nand.h | 9 +++++++++ 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index ce005528e55f..22c92073b3dd 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -13,8 +13,8 @@ /** * struct nand_ecc_sw_bch_conf - private software BCH ECC engine structure - * @reqooblen: Save the actual user OOB length requested before overwriting it - * @spare_oobbuf: Spare OOB buffer if none is provided + * @req_ctx: Save request context and tweak the original request to fit the + * engine needs * @code_size: Number of bytes needed to store a code (one code per step) * @nsteps: Number of steps * @calc_buf: Buffer to use when calculating ECC bytes @@ -24,8 +24,7 @@ * @eccmask: XOR ecc mask, allows erased pages to be decoded as valid */ struct nand_ecc_sw_bch_conf { - unsigned int reqooblen; - void *spare_oobbuf; + struct nand_ecc_req_tweak_ctx req_ctx; unsigned int code_size; unsigned int nsteps; u8 *calc_buf; @@ -41,8 +40,9 @@ int nand_ecc_sw_bch_calculate(struct nand_device *nand, const unsigned char *buf, unsigned char *code); int nand_ecc_sw_bch_correct(struct nand_device *nand, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); -int nand_ecc_sw_bch_init(struct nand_device *nand); -void nand_ecc_sw_bch_cleanup(struct nand_device *nand); +int nand_ecc_sw_bch_init_ctx(struct nand_device *nand); +void nand_ecc_sw_bch_cleanup_ctx(struct nand_device *nand); +struct nand_ecc_engine *nand_ecc_sw_bch_get_engine(void); #else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ @@ -61,12 +61,12 @@ static inline int nand_ecc_sw_bch_correct(struct nand_device *nand, return -ENOTSUPP; } -static inline int nand_ecc_sw_bch_init(struct nand_device *nand) +static inline int nand_ecc_sw_bch_init_ctx(struct nand_device *nand) { return -ENOTSUPP; } -static inline void nand_ecc_sw_bch_cleanup(struct nand_device *nand) {} +static inline void nand_ecc_sw_bch_cleanup_ctx(struct nand_device *nand) {} #endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 36e4fe08d0ea..df8548187713 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -278,6 +278,15 @@ int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); +#if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) +struct nand_ecc_engine *nand_ecc_sw_bch_get_engine(void); +#else +static inline struct nand_ecc_engine *nand_ecc_sw_bch_get_engine(void) +{ + return NULL; +} +#endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ + /** * struct nand_ecc_req_tweak_ctx - Help for automatically tweaking requests * @orig_req: Pointer to the original IO request -- cgit v1.2.3 From cbd87780bed580b585d2992f29077ac44950cb66 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:14 +0200 Subject: mtd: rawnand: Get rid of chip->ecc.priv nand_ecc_ctrl embeds a private pointer which only has a meaning in the sunxi driver. This structure will soon be deprecated, but as this field is actually not needed, let's just drop it. Cc: Maxime Ripard Cc: Chen-Yu Tsai Signed-off-by: Miquel Raynal Acked-by: Maxime Ripard Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-11-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 23623beaad1d..15743c2c2cab 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -302,7 +302,6 @@ static const struct nand_ecc_caps __name = { \ * @prepad: padding information for syndrome based ECC generators * @postpad: padding information for syndrome based ECC generators * @options: ECC specific options (see NAND_ECC_XXX flags defined above) - * @priv: pointer to private ECC control data * @calc_buf: buffer for calculated ECC, size is oobsize. * @code_buf: buffer for ECC read from flash, size is oobsize. * @hwctl: function to control hardware ECC generator. Must only @@ -355,7 +354,6 @@ struct nand_ecc_ctrl { int prepad; int postpad; unsigned int options; - void *priv; u8 *calc_buf; u8 *code_buf; void (*hwctl)(struct nand_chip *chip, int mode); -- cgit v1.2.3 From e5acf9c862974041f7b2f581d1a40ccd29769add Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:15 +0200 Subject: mtd: nand: ecc-hamming: Move Hamming code to the generic NAND layer Hamming ECC code might be later re-used by the SPI NAND layer. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-12-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 39 +++++++++++++++++++++++++++++++++ include/linux/mtd/nand_ecc.h | 39 --------------------------------- include/linux/mtd/sharpsl.h | 2 +- 3 files changed, 40 insertions(+), 40 deletions(-) create mode 100644 include/linux/mtd/nand-ecc-sw-hamming.h delete mode 100644 include/linux/mtd/nand_ecc.h (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h new file mode 100644 index 000000000000..30a0cfa50eed --- /dev/null +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2000-2010 Steven J. Hill + * David Woodhouse + * Thomas Gleixner + * + * This file is the header for the ECC algorithm. + */ + +#ifndef __MTD_NAND_ECC_SW_HAMMING_H__ +#define __MTD_NAND_ECC_SW_HAMMING_H__ + +struct nand_chip; + +/* + * Calculate 3 byte ECC code for eccsize byte block + */ +void __nand_calculate_ecc(const u_char *dat, unsigned int eccsize, + u_char *ecc_code, bool sm_order); + +/* + * Calculate 3 byte ECC code for 256/512 byte block + */ +int nand_calculate_ecc(struct nand_chip *chip, const u_char *dat, + u_char *ecc_code); + +/* + * Detect and correct a 1 bit error for eccsize byte block + */ +int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc, + unsigned int eccsize, bool sm_order); + +/* + * Detect and correct a 1 bit error for 256/512 byte block + */ +int nand_correct_data(struct nand_chip *chip, u_char *dat, u_char *read_ecc, + u_char *calc_ecc); + +#endif /* __MTD_NAND_ECC_SW_HAMMING_H__ */ diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h deleted file mode 100644 index d423916b94f0..000000000000 --- a/include/linux/mtd/nand_ecc.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2000-2010 Steven J. Hill - * David Woodhouse - * Thomas Gleixner - * - * This file is the header for the ECC algorithm. - */ - -#ifndef __MTD_NAND_ECC_H__ -#define __MTD_NAND_ECC_H__ - -struct nand_chip; - -/* - * Calculate 3 byte ECC code for eccsize byte block - */ -void __nand_calculate_ecc(const u_char *dat, unsigned int eccsize, - u_char *ecc_code, bool sm_order); - -/* - * Calculate 3 byte ECC code for 256/512 byte block - */ -int nand_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code); - -/* - * Detect and correct a 1 bit error for eccsize byte block - */ -int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc, - unsigned int eccsize, bool sm_order); - -/* - * Detect and correct a 1 bit error for 256/512 byte block - */ -int nand_correct_data(struct nand_chip *chip, u_char *dat, u_char *read_ecc, - u_char *calc_ecc); - -#endif /* __MTD_NAND_ECC_H__ */ diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h index d2c3cf29e0d1..3762a90077d6 100644 --- a/include/linux/mtd/sharpsl.h +++ b/include/linux/mtd/sharpsl.h @@ -9,7 +9,7 @@ #define _MTD_SHARPSL_H #include -#include +#include #include struct sharpsl_nand_platform_data { -- cgit v1.2.3 From 2dbe0192efa02f2f405e193f4de84bf07c7f91fb Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:16 +0200 Subject: mtd: nand: ecc-hamming: Clarify the driver descriptions The include file pretends being the header for "ECC algorithm", while it is just the header for the Hamming implementation. Make this clear by rewording the sentence. Do the same with the module description. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-13-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index 30a0cfa50eed..85e9a929b5f9 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -4,7 +4,7 @@ * David Woodhouse * Thomas Gleixner * - * This file is the header for the ECC algorithm. + * This file is the header for the NAND Hamming ECC implementation. */ #ifndef __MTD_NAND_ECC_SW_HAMMING_H__ -- cgit v1.2.3 From 90ccf0a0192f7fa06e52de80cb528c5217e3e297 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:19 +0200 Subject: mtd: nand: ecc-hamming: Rename the exported functions Prefix by ecc_sw_hamming_ the functions which should be internal only but are exported for "raw" operations. Prefix by nand_ecc_sw_hamming_ the other functions which will be used in the context of the declaration of an Hamming proper ECC engine object. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-16-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 36 +++++++++++---------------------- include/linux/mtd/rawnand.h | 7 +++++++ 2 files changed, 19 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index 85e9a929b5f9..84a123bf45f3 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -10,30 +10,18 @@ #ifndef __MTD_NAND_ECC_SW_HAMMING_H__ #define __MTD_NAND_ECC_SW_HAMMING_H__ -struct nand_chip; +#include -/* - * Calculate 3 byte ECC code for eccsize byte block - */ -void __nand_calculate_ecc(const u_char *dat, unsigned int eccsize, - u_char *ecc_code, bool sm_order); - -/* - * Calculate 3 byte ECC code for 256/512 byte block - */ -int nand_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code); - -/* - * Detect and correct a 1 bit error for eccsize byte block - */ -int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc, - unsigned int eccsize, bool sm_order); - -/* - * Detect and correct a 1 bit error for 256/512 byte block - */ -int nand_correct_data(struct nand_chip *chip, u_char *dat, u_char *read_ecc, - u_char *calc_ecc); +int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size, + unsigned char *code, bool sm_order); +int nand_ecc_sw_hamming_calculate(struct nand_device *nand, + const unsigned char *buf, + unsigned char *code); +int ecc_sw_hamming_correct(unsigned char *buf, unsigned char *read_ecc, + unsigned char *calc_ecc, unsigned int step_size, + bool sm_order); +int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc); #endif /* __MTD_NAND_ECC_SW_HAMMING_H__ */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 15743c2c2cab..685d24557e95 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1301,6 +1301,13 @@ static inline int nand_opcode_8bits(unsigned int command) return 0; } +int rawnand_sw_hamming_calculate(struct nand_chip *chip, + const unsigned char *buf, + unsigned char *code); +int rawnand_sw_hamming_correct(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc); int rawnand_sw_bch_init(struct nand_chip *chip); int rawnand_sw_bch_correct(struct nand_chip *chip, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); -- cgit v1.2.3 From 19b2ce184b9f404d6620adf667a9019e6abcae51 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:20 +0200 Subject: mtd: nand: ecc-hamming: Stop using raw NAND structures This code is meant to be reused by the SPI-NAND core. Now that the driver has been cleaned and reorganized, use a generic ECC engine object to store the driver's data instead of accessing members of the nand_chip structure. This means adding proper init/cleanup helpers. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-17-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 20 ++++++++++++++++++++ include/linux/mtd/rawnand.h | 2 ++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index 84a123bf45f3..9d4b4d623741 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -12,6 +12,26 @@ #include +/** + * struct nand_ecc_sw_hamming_conf - private software Hamming ECC engine structure + * @reqooblen: Save the actual user OOB length requested before overwriting it + * @spare_oobbuf: Spare OOB buffer if none is provided + * @code_size: Number of bytes needed to store a code (one code per step) + * @nsteps: Number of steps + * @calc_buf: Buffer to use when calculating ECC bytes + * @code_buf: Buffer to use when reading (raw) ECC bytes from the chip + * @sm_order: Smart Media special ordering + */ +struct nand_ecc_sw_hamming_conf { + unsigned int reqooblen; + void *spare_oobbuf; + unsigned int code_size; + unsigned int nsteps; + u8 *calc_buf; + u8 *code_buf; + unsigned int sm_order; +}; + int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size, unsigned char *code, bool sm_order); int nand_ecc_sw_hamming_calculate(struct nand_device *nand, diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 685d24557e95..0a90a8792d18 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1301,6 +1301,7 @@ static inline int nand_opcode_8bits(unsigned int command) return 0; } +int rawnand_sw_hamming_init(struct nand_chip *chip); int rawnand_sw_hamming_calculate(struct nand_chip *chip, const unsigned char *buf, unsigned char *code); @@ -1308,6 +1309,7 @@ int rawnand_sw_hamming_correct(struct nand_chip *chip, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); +void rawnand_sw_hamming_cleanup(struct nand_chip *chip); int rawnand_sw_bch_init(struct nand_chip *chip); int rawnand_sw_bch_correct(struct nand_chip *chip, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); -- cgit v1.2.3 From eb08376a5dd943cf2a7360f236fe20bbd709fa95 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:21 +0200 Subject: mtd: nand: ecc-hamming: Remove useless includes Most of the includes are simply useless, drop them. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-18-miquel.raynal@bootlin.com --- include/linux/mtd/sharpsl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h index 3762a90077d6..231bd1c3f408 100644 --- a/include/linux/mtd/sharpsl.h +++ b/include/linux/mtd/sharpsl.h @@ -9,7 +9,6 @@ #define _MTD_SHARPSL_H #include -#include #include struct sharpsl_nand_platform_data { -- cgit v1.2.3 From 5180a62c12497aa491a7c79c062a9e3a884c9762 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:22 +0200 Subject: mtd: nand: ecc-hamming: Let the software Hamming ECC engine be unselected There is no reason to always embed the software Hamming ECC engine implementation. By default it is (with raw NAND), but we can let the user decide. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-19-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index 9d4b4d623741..5a39e96c3546 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -32,6 +32,8 @@ struct nand_ecc_sw_hamming_conf { unsigned int sm_order; }; +#if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) + int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size, unsigned char *code, bool sm_order); int nand_ecc_sw_hamming_calculate(struct nand_device *nand, @@ -44,4 +46,38 @@ int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); +#else /* !CONFIG_MTD_NAND_ECC_SW_HAMMING */ + +static inline int ecc_sw_hamming_calculate(const unsigned char *buf, + unsigned int step_size, + unsigned char *code, bool sm_order) +{ + return -ENOTSUPP; +} + +static inline int nand_ecc_sw_hamming_calculate(struct nand_device *nand, + const unsigned char *buf, + unsigned char *code) +{ + return -ENOTSUPP; +} + +static inline int ecc_sw_hamming_correct(unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc, + unsigned int step_size, bool sm_order) +{ + return -ENOTSUPP; +} + +static inline int nand_ecc_sw_hamming_correct(struct nand_device *nand, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return -ENOTSUPP; +} + +#endif /* CONFIG_MTD_NAND_ECC_SW_HAMMING */ + #endif /* __MTD_NAND_ECC_SW_HAMMING_H__ */ -- cgit v1.2.3 From 35fe1b98a0082ad3f576bcc420c74dab435da307 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:23 +0200 Subject: mtd: nand: ecc-hamming: Create the software Hamming engine Let's continue introducing the generic ECC engine abstraction in the NAND subsystem by instantiating a second ECC engine: software Hamming. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-20-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 16 ++++++++++++---- include/linux/mtd/nand.h | 9 +++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index 5a39e96c3546..9f9073d86ff3 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -14,8 +14,8 @@ /** * struct nand_ecc_sw_hamming_conf - private software Hamming ECC engine structure - * @reqooblen: Save the actual user OOB length requested before overwriting it - * @spare_oobbuf: Spare OOB buffer if none is provided + * @req_ctx: Save request context and tweak the original request to fit the + * engine needs * @code_size: Number of bytes needed to store a code (one code per step) * @nsteps: Number of steps * @calc_buf: Buffer to use when calculating ECC bytes @@ -23,8 +23,7 @@ * @sm_order: Smart Media special ordering */ struct nand_ecc_sw_hamming_conf { - unsigned int reqooblen; - void *spare_oobbuf; + struct nand_ecc_req_tweak_ctx req_ctx; unsigned int code_size; unsigned int nsteps; u8 *calc_buf; @@ -34,6 +33,8 @@ struct nand_ecc_sw_hamming_conf { #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) +int nand_ecc_sw_hamming_init_ctx(struct nand_device *nand); +void nand_ecc_sw_hamming_cleanup_ctx(struct nand_device *nand); int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size, unsigned char *code, bool sm_order); int nand_ecc_sw_hamming_calculate(struct nand_device *nand, @@ -48,6 +49,13 @@ int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf, #else /* !CONFIG_MTD_NAND_ECC_SW_HAMMING */ +static inline int nand_ecc_sw_hamming_init_ctx(struct nand_device *nand) +{ + return -ENOTSUPP; +} + +static inline void nand_ecc_sw_hamming_cleanup_ctx(struct nand_device *nand) {} + static inline int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size, unsigned char *code, bool sm_order) diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index df8548187713..3616fa27eaa1 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -278,6 +278,15 @@ int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); +#if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) +struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void); +#else +static inline struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void) +{ + return NULL; +} +#endif /* CONFIG_MTD_NAND_ECC_SW_HAMMING */ + #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) struct nand_ecc_engine *nand_ecc_sw_bch_get_engine(void); #else -- cgit v1.2.3 From 53fbdeeb57a0168a88547e22f8d433810c531169 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:24 +0200 Subject: mtd: nand: Let software ECC engines be retrieved from the NAND core Before making use of the ECC engines, we must retrieve them. Add the boilerplate for the ones already available: software engines (Hamming and BCH). Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-21-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 3616fa27eaa1..f78f61c9a9ee 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -277,6 +277,7 @@ int nand_ecc_prepare_io_req(struct nand_device *nand, int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); +struct nand_ecc_engine *nand_ecc_get_sw_engine(struct nand_device *nand); #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void); -- cgit v1.2.3 From 945845b54c9cf61809d1963492bb728ce8937964 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 17:41:07 +0200 Subject: mtd: spinand: Instantiate a SPI-NAND on-die ECC engine Make use of the existing functions taken from the SPI-NAND core to instantiate an on-die ECC engine specific to the SPI-NAND core. The next step will be to tweak the core to use this object instead of calling the helpers directly. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200930154109.3922-4-miquel.raynal@bootlin.com --- include/linux/mtd/spinand.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 7b78c4ba9b3e..6bb92f26833e 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -286,6 +286,15 @@ struct spinand_ecc_info { #define SPINAND_HAS_QE_BIT BIT(0) #define SPINAND_HAS_CR_FEAT_BIT BIT(1) +/** + * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure + * @status: status of the last wait operation that will be used in case + * ->get_status() is not populated by the spinand device. + */ +struct spinand_ondie_ecc_conf { + u8 status; +}; + /** * struct spinand_info - Structure used to describe SPI NAND chips * @model: model name -- cgit v1.2.3 From da429b9615803b6f19e5734c4c4d99136e1e3bfd Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 17:41:08 +0200 Subject: mtd: nand: Let on-die ECC engines be retrieved from the NAND core Before making use of the ECC engines, we must retrieve them. Add the necessary boilerplate. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200930154109.3922-5-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index f78f61c9a9ee..6c6f91c03c42 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -278,6 +278,7 @@ int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); struct nand_ecc_engine *nand_ecc_get_sw_engine(struct nand_device *nand); +struct nand_ecc_engine *nand_ecc_get_on_die_hw_engine(struct nand_device *nand); #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void); -- cgit v1.2.3 From 6b0c3b84156125e029956e46d2b44e72f513a9fa Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 1 Oct 2020 12:20:09 +0200 Subject: mtd: nand: Add helpers to manage ECC engines and configurations Add the logic in the NAND core to find the right ECC engine depending on the NAND chip requirements and the user desires. Right now, the choice may be made between (more will come): * software Hamming * software BCH * on-die (SPI-NAND devices only) Once the ECC engine has been found, the ECC engine must be configured. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201001102014.20100-2-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 6c6f91c03c42..414f8a4d2853 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -936,6 +936,10 @@ bool nanddev_isreserved(struct nand_device *nand, const struct nand_pos *pos); int nanddev_erase(struct nand_device *nand, const struct nand_pos *pos); int nanddev_markbad(struct nand_device *nand, const struct nand_pos *pos); +/* ECC related functions */ +int nanddev_ecc_engine_init(struct nand_device *nand); +void nanddev_ecc_engine_cleanup(struct nand_device *nand); + /* BBT related functions */ enum nand_bbt_block_status { NAND_BBT_BLOCK_STATUS_UNKNOWN, -- cgit v1.2.3 From 7998d89875177a5fac9f963e230dbb828c218cb9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:04 +0200 Subject: mtd: rawnand: fix a kernel-doc markup Some identifiers have different names between their prototypes and the kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/9ed47a57d12c40e73a9b01612ee119d39baa6236.1603469755.git.mchehab+huawei@kernel.org --- include/linux/mtd/rawnand.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 0a90a8792d18..6b3240e44310 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1284,7 +1284,8 @@ static inline bool nand_is_slc(struct nand_chip *chip) } /** - * Check if the opcode's address should be sent only on the lower 8 bits + * nand_opcode_8bits - Check if the opcode's address should be sent only on the + * lower 8 bits * @command: opcode to check */ static inline int nand_opcode_8bits(unsigned int command) -- cgit v1.2.3 From 0f6b791955a6365b5ebe8b6a5b01de69a47ee92e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 10 Nov 2020 09:19:08 -0300 Subject: mtd: rawnand: mxc: Remove platform data support i.MX is a devicetree-only platform now and the existing platform data support in this driver was only useful for old non-devicetree platforms. Get rid of the platform data support since it is no longer used. Signed-off-by: Fabio Estevam Reviewed-by: Sascha Hauer Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201110121908.19400-1-festevam@gmail.com --- include/linux/platform_data/mtd-mxc_nand.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/linux/platform_data/mtd-mxc_nand.h (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-mxc_nand.h b/include/linux/platform_data/mtd-mxc_nand.h deleted file mode 100644 index d1230030c6db..000000000000 --- a/include/linux/platform_data/mtd-mxc_nand.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved. - * Copyright 2008 Sascha Hauer, kernel@pengutronix.de - */ - -#ifndef __ASM_ARCH_NAND_H -#define __ASM_ARCH_NAND_H - -#include - -struct mxc_nand_platform_data { - unsigned int width; /* data bus width in bytes */ - unsigned int hw_ecc:1; /* 0 if suppress hardware ECC */ - unsigned int flash_bbt:1; /* set to 1 to use a flash based bbt */ - struct mtd_partition *parts; /* partition table */ - int nr_parts; /* size of parts */ -}; -#endif /* __ASM_ARCH_NAND_H */ -- cgit v1.2.3 From bdfae1c9a913930eae5ea506733aa7c285e12a06 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Dec 2020 09:44:44 +0800 Subject: vfio/type1: Add vfio_group_iommu_domain() Add the API for getting the domain from a vfio group. This could be used by the physical device drivers which rely on the vfio/mdev framework for mediated device user level access. The typical use case like below: unsigned int pasid; struct vfio_group *vfio_group; struct iommu_domain *iommu_domain; struct device *dev = mdev_dev(mdev); struct device *iommu_device = mdev_get_iommu_device(dev); if (!iommu_device || !iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) return -EINVAL; vfio_group = vfio_group_get_external_user_from_dev(dev); if (IS_ERR_OR_NULL(vfio_group)) return -EFAULT; iommu_domain = vfio_group_iommu_domain(vfio_group); if (IS_ERR_OR_NULL(iommu_domain)) { vfio_group_put_external_user(vfio_group); return -EFAULT; } pasid = iommu_aux_get_pasid(iommu_domain, iommu_device); if (pasid < 0) { vfio_group_put_external_user(vfio_group); return -EFAULT; } /* Program device context with pasid value. */ ... Signed-off-by: Lu Baolu Signed-off-by: Alex Williamson --- include/linux/vfio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 38d3c6a8dc7e..f45940b38a02 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -90,6 +90,8 @@ struct vfio_iommu_driver_ops { struct notifier_block *nb); int (*dma_rw)(void *iommu_data, dma_addr_t user_iova, void *data, size_t count, bool write); + struct iommu_domain *(*group_iommu_domain)(void *iommu_data, + struct iommu_group *group); }; extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); @@ -126,6 +128,8 @@ extern int vfio_group_unpin_pages(struct vfio_group *group, extern int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, void *data, size_t len, bool write); +extern struct iommu_domain *vfio_group_iommu_domain(struct vfio_group *group); + /* each type has independent events */ enum vfio_notify_type { VFIO_IOMMU_NOTIFY = 0, -- cgit v1.2.3 From 88149082bb8ef31b289673669e080ec6a00c2e59 Mon Sep 17 00:00:00 2001 From: Hao Li Date: Tue, 8 Dec 2020 10:08:43 +0800 Subject: fs: Handle I_DONTCACHE in iput_final() instead of generic_drop_inode() If generic_drop_inode() returns true, it means iput_final() can evict this inode regardless of whether it is dirty or not. If we check I_DONTCACHE in generic_drop_inode(), any inode with this bit set will be evicted unconditionally. This is not the desired behavior because I_DONTCACHE only means the inode shouldn't be cached on the LRU list. As for whether we need to evict this inode, this is what generic_drop_inode() should do. This patch corrects the usage of I_DONTCACHE. This patch was proposed in [1]. [1]: https://lore.kernel.org/linux-fsdevel/20200831003407.GE12096@dread.disaster.area/ Fixes: dae2f8ed7992 ("fs: Lift XFS_IDONTCACHE to the VFS layer") Signed-off-by: Hao Li Reviewed-by: Dave Chinner Reviewed-by: Ira Weiny Signed-off-by: Al Viro --- include/linux/fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8667d0cdc71e..8bde32cf9711 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2878,8 +2878,7 @@ extern int inode_needs_sync(struct inode *inode); extern int generic_delete_inode(struct inode *inode); static inline int generic_drop_inode(struct inode *inode) { - return !inode->i_nlink || inode_unhashed(inode) || - (inode->i_state & I_DONTCACHE); + return !inode->i_nlink || inode_unhashed(inode); } extern void d_mark_dontcache(struct inode *inode); -- cgit v1.2.3 From c9e6189fb03123a7dfb93589280347b46f30b161 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:46:18 +0100 Subject: ntp: Make the RTC synchronization more reliable Miroslav reported that the periodic RTC synchronization in the NTP code fails more often than not to hit the specified update window. The reason is that the code uses delayed_work to schedule the update which needs to be in thread context as the underlying RTC might be connected via a slow bus, e.g. I2C. In the update function it verifies whether the current time is correct vs. the requirements of the underlying RTC. But delayed_work is using the timer wheel for scheduling which is inaccurate by design. Depending on the distance to the expiry the wheel gets less granular to allow batching and to avoid the cascading of the original timer wheel. See 500462a9de65 ("timers: Switch to a non-cascading wheel") and the code for further details. The code already deals with this by splitting the 660 seconds period into a long 659 seconds timer and then retrying with a smaller delta. But looking at the actual granularities of the timer wheel (which depend on the HZ configuration) the 659 seconds timer ends up in an outer wheel level and is affected by a worst case granularity of: HZ Granularity 1000 32s 250 16s 100 40s So the initial timer can be already off by max 12.5% which is not a big issue as the period of the sync is defined as ~11 minutes. The fine grained second attempt schedules to the desired update point with a timer expiring less than a second from now. Depending on the actual delta and the HZ setting even the second attempt can end up in outer wheel levels which have a large enough granularity to make the correctness check fail. As this is a fundamental property of the timer wheel there is no way to make this more accurate short of iterating in one jiffies steps towards the update point. Switch it to an hrtimer instead which schedules the actual update work. The hrtimer will expire precisely (max 1 jiffie delay when high resolution timers are not available). The actual scheduling delay of the work is the same as before. The update is triggered from do_adjtimex() which is a bit racy but not much more racy than it was before: if (ntp_synced()) queue_delayed_work(system_power_efficient_wq, &sync_work, 0); which is racy when the work is currently executed and has not managed to reschedule itself. This becomes now: if (ntp_synced() && !hrtimer_is_queued(&sync_hrtimer)) queue_work(system_power_efficient_wq, &sync_work, 0); which is racy when the hrtimer has expired and the work is currently executed and has not yet managed to rearm the hrtimer. Not a big problem as it just schedules work for nothing. The new implementation has a safe guard in place to catch the case where the hrtimer is queued on entry to the work function and avoids an extra update attempt of the RTC that way. Reported-by: Miroslav Lichvar Signed-off-by: Thomas Gleixner Tested-by: Miroslav Lichvar Reviewed-by: Jason Gunthorpe Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201206220542.062910520@linutronix.de --- include/linux/timex.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index ce0859763670..9c2e54faf9b7 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -157,7 +157,6 @@ extern int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * extern void hardpps(const struct timespec64 *, const struct timespec64 *); int read_current_timer(unsigned long *timer_val); -void ntp_notify_cmos_timer(void); /* The clock frequency of the i8253/i8254 PIT */ #define PIT_TICK_RATE 1193182ul -- cgit v1.2.3 From 33e62e832384c8cb523044e0e9d99d7133f98e93 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:46:19 +0100 Subject: ntp, rtc: Move rtc_set_ntp_time() to ntp code rtc_set_ntp_time() is not really RTC functionality as the code is just a user of RTC. Move it into the NTP code which allows further cleanups. Requested-by: Alexandre Belloni Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201206220542.166871172@linutronix.de --- include/linux/rtc.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 22d1575e4991..ff62680b48ca 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -165,7 +165,6 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc); extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); -extern int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec); int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); extern int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); @@ -205,39 +204,6 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } -/* Determine if we can call to driver to set the time. Drivers can only be - * called to set a second aligned time value, and the field set_offset_nsec - * specifies how far away from the second aligned time to call the driver. - * - * This also computes 'to_set' which is the time we are trying to set, and has - * a zero in tv_nsecs, such that: - * to_set - set_delay_nsec == now +/- FUZZ - * - */ -static inline bool rtc_tv_nsec_ok(s64 set_offset_nsec, - struct timespec64 *to_set, - const struct timespec64 *now) -{ - /* Allowed error in tv_nsec, arbitarily set to 5 jiffies in ns. */ - const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5; - struct timespec64 delay = {.tv_sec = 0, - .tv_nsec = set_offset_nsec}; - - *to_set = timespec64_add(*now, delay); - - if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) { - to_set->tv_nsec = 0; - return true; - } - - if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) { - to_set->tv_sec++; - to_set->tv_nsec = 0; - return true; - } - return false; -} - #define rtc_register_device(device) \ __rtc_register_device(THIS_MODULE, device) -- cgit v1.2.3 From 69eca258c85000564577642ba28335eb4e1df8f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:46:20 +0100 Subject: ntp: Make the RTC sync offset less obscure The current RTC set_offset_nsec value is not really intuitive to understand. tsched twrite(t2.tv_sec - 1) t2 (seconds increment) The offset is calculated from twrite based on the assumption that t2 - twrite == 1s. That means for the MC146818 RTC the offset needs to be negative so that the write happens 500ms before t2. It's easier to understand when the whole calculation is based on t2. That avoids negative offsets and the meaning is obvious: t2 - twrite: The time defined by the chip when seconds increment after the write. twrite - tsched: The time for the transport to the point where the chip is updated. ==> set_offset_nsec = t2 - tsched ttransport = twrite - tsched tRTCinc = t2 - twrite ==> set_offset_nsec = ttransport + tRTCinc tRTCinc is a chip property and can be obtained from the data sheet. ttransport depends on how the RTC is connected. It is close to 0 for directly accessible RTCs. For RTCs behind a slow bus, e.g. i2c, it's the time required to send the update over the bus. This can be estimated or even calibrated, but that's a different problem. Adjust the implementation and update comments accordingly. Signed-off-by: Thomas Gleixner Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201206220542.263204937@linutronix.de --- include/linux/rtc.h | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index ff62680b48ca..b829382de6c3 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -110,13 +110,36 @@ struct rtc_device { /* Some hardware can't support UIE mode */ int uie_unsupported; - /* Number of nsec it takes to set the RTC clock. This influences when - * the set ops are called. An offset: - * - of 0.5 s will call RTC set for wall clock time 10.0 s at 9.5 s - * - of 1.5 s will call RTC set for wall clock time 10.0 s at 8.5 s - * - of -0.5 s will call RTC set for wall clock time 10.0 s at 10.5 s + /* + * This offset specifies the update timing of the RTC. + * + * tsched t1 write(t2.tv_sec - 1sec)) t2 RTC increments seconds + * + * The offset defines how tsched is computed so that the write to + * the RTC (t2.tv_sec - 1sec) is correct versus the time required + * for the transport of the write and the time which the RTC needs + * to increment seconds the first time after the write (t2). + * + * For direct accessible RTCs tsched ~= t1 because the write time + * is negligible. For RTCs behind slow busses the transport time is + * significant and has to be taken into account. + * + * The time between the write (t1) and the first increment after + * the write (t2) is RTC specific. For a MC146818 RTC it's 500ms, + * for many others it's exactly 1 second. Consult the datasheet. + * + * The value of this offset is also used to calculate the to be + * written value (t2.tv_sec - 1sec) at tsched. + * + * The default value for this is NSEC_PER_SEC + 10 msec default + * transport time. The offset can be adjusted by drivers so the + * calculation for the to be written value at tsched becomes + * correct: + * + * newval = tsched + set_offset_nsec - NSEC_PER_SEC + * and (tsched + set_offset_nsec) % NSEC_PER_SEC == 0 */ - long set_offset_nsec; + unsigned long set_offset_nsec; bool registered; -- cgit v1.2.3 From fe79d5de77204dd946cfad76a9bec23354b1a500 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Thu, 10 Dec 2020 17:05:20 +0100 Subject: USB: typec: tcpm: Add a 30ms room for tPSSourceOn in PR_SWAP TCPM state machine needs 20-25ms to enter the ErrorRecovery state after tPSSourceOn timer timeouts. Change the timer from max 480ms to 450ms to ensure that the timer complies with the Spec. In order to keep the flexibility for other usecases using tPSSourceOn, add another timer only for PR_SWAP. Cc: Guenter Roeck Cc: Heikki Krogerus Cc: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Kyle Tso Signed-off-by: Will McVicker Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20201210160521.3417426-5-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index 63a66dd5d832..bb9a782e1411 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -466,6 +466,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_DRP_SRC 30 #define PD_T_PS_SOURCE_OFF 920 #define PD_T_PS_SOURCE_ON 480 +#define PD_T_PS_SOURCE_ON_PRS 450 /* 390 - 480ms */ #define PD_T_PS_HARD_RESET 30 #define PD_T_SRC_RECOVER 760 #define PD_T_SRC_RECOVER_MAX 1000 -- cgit v1.2.3 From fecc4559780d52d174ea05e3bf543669165389c3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 2 Dec 2020 14:07:09 +0200 Subject: fsnotify: fix events reported to watching parent and child fsnotify_parent() used to send two separate events to backends when a parent inode is watching children and the child inode is also watching. In an attempt to avoid duplicate events in fanotify, we unified the two backend callbacks to a single callback and handled the reporting of the two separate events for the relevant backends (inotify and dnotify). However the handling is buggy and can result in inotify and dnotify listeners receiving events of the type they never asked for or spurious events. The problem is the unified event callback with two inode marks (parent and child) is called when any of the parent and child inodes are watched and interested in the event, but the parent inode's mark that is interested in the event on the child is not necessarily the one we are currently reporting to (it could belong to a different group). So before reporting the parent or child event flavor to backend we need to check that the mark is really interested in that event flavor. The semantics of INODE and CHILD marks were hard to follow and made the logic more complicated than it should have been. Replace it with INODE and PARENT marks semantics to hopefully make the logic more clear. Thanks to Hugh Dickins for spotting a bug in the earlier version of this patch. Fixes: 497b0c5a7c06 ("fsnotify: send event to parent and child with single callback") CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201202120713.702387-4-amir73il@gmail.com Reported-by: Hugh Dickins Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 4ee3044eedd0..a2e42d3cd87c 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -278,7 +278,7 @@ static inline const struct path *fsnotify_data_path(const void *data, enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, - FSNOTIFY_OBJ_TYPE_CHILD, + FSNOTIFY_OBJ_TYPE_PARENT, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, @@ -286,7 +286,7 @@ enum fsnotify_obj_type { }; #define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) -#define FSNOTIFY_OBJ_TYPE_CHILD_FL (1U << FSNOTIFY_OBJ_TYPE_CHILD) +#define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT) #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) #define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) #define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) @@ -331,7 +331,7 @@ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ } FSNOTIFY_ITER_FUNCS(inode, INODE) -FSNOTIFY_ITER_FUNCS(child, CHILD) +FSNOTIFY_ITER_FUNCS(parent, PARENT) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) FSNOTIFY_ITER_FUNCS(sb, SB) -- cgit v1.2.3 From 14486c82612a177cb910980c70ba900827ca0894 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 4 Nov 2020 15:46:41 +0200 Subject: rfkill: add a reason to the HW rfkill state The WLAN device may exist yet not be usable. This can happen when the WLAN device is controllable by both the host and some platform internal component. We need some arbritration that is vendor specific, but when the device is not available for the host, we need to reflect this state towards the user space. Add a reason field to the rfkill object (and event) so that userspace can know why the device is in rfkill: because some other platform component currently owns the device, or because the actual hw rfkill signal is asserted. Capable userspace can now determine the reason for the rfkill and possibly do some negotiation on a side band channel using a proprietary protocol to gain ownership on the device in case the device is owned by some other component. When the host gains ownership on the device, the kernel can remove the RFKILL_HARD_BLOCK_NOT_OWNER reason and the hw rfkill state will be off. Then, the userspace can bring the device up and start normal operation. The rfkill_event structure is enlarged to include the additional byte, it is now 9 bytes long. Old user space will ask to read only 8 bytes so that the kernel can know not to feed them with more data. When the user space writes 8 bytes, new kernels will just read what is present in the file descriptor. This new byte is read only from the userspace standpoint anyway. If a new user space uses an old kernel, it'll ask to read 9 bytes but will get only 8, and it'll know that it didn't get the new state. When it'll write 9 bytes, the kernel will again ignore this new byte which is read only from the userspace standpoint. Signed-off-by: Emmanuel Grumbach Link: https://lore.kernel.org/r/20201104134641.28816-1-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- include/linux/rfkill.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 8ad2487a86d5..231e06b74b50 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -137,6 +137,17 @@ void rfkill_unregister(struct rfkill *rfkill); */ void rfkill_destroy(struct rfkill *rfkill); +/** + * rfkill_set_hw_state_reason - Set the internal rfkill hardware block state + * with a reason + * @rfkill: pointer to the rfkill class to modify. + * @blocked: the current hardware block state to set + * @reason: one of &enum rfkill_hard_block_reasons + * + * Prefer to use rfkill_set_hw_state if you don't need any special reason. + */ +bool rfkill_set_hw_state_reason(struct rfkill *rfkill, + bool blocked, unsigned long reason); /** * rfkill_set_hw_state - Set the internal rfkill hardware block state * @rfkill: pointer to the rfkill class to modify. @@ -156,7 +167,11 @@ void rfkill_destroy(struct rfkill *rfkill); * should be blocked) so that drivers need not keep track of the soft * block state -- which they might not be able to. */ -bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked); +static inline bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked) +{ + return rfkill_set_hw_state_reason(rfkill, blocked, + RFKILL_HARD_BLOCK_SIGNAL); +} /** * rfkill_set_sw_state - Set the internal rfkill software block state @@ -256,6 +271,13 @@ static inline void rfkill_destroy(struct rfkill *rfkill) { } +static inline bool rfkill_set_hw_state_reason(struct rfkill *rfkill, + bool blocked, + unsigned long reason) +{ + return blocked; +} + static inline bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked) { return blocked; -- cgit v1.2.3 From d6587602c59974a2eda35e8ed70a4f5970380be8 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 29 Nov 2020 17:30:46 +0200 Subject: cfg80211: Parse SAE H2E only membership selector This extends the support for drivers that rebuild IEs in the FW (same as with HT/VHT/HE). Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20201129172929.4012647275f3.I1a93ae71c57ef0b6f58f99d47fce919d19d65ff0@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 5e8cc9c3d45a..cbd294239430 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1261,6 +1261,7 @@ struct ieee80211_mgmt { #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 #define BSS_MEMBERSHIP_SELECTOR_VHT_PHY 126 #define BSS_MEMBERSHIP_SELECTOR_HE_PHY 122 +#define BSS_MEMBERSHIP_SELECTOR_SAE_H2E 123 /* mgmt header + 1 byte category code */ #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) -- cgit v1.2.3 From 9850742470804b2cc6a6543bd8f5822eeb5fdbc0 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Sun, 29 Nov 2020 17:30:52 +0200 Subject: ieee80211: update reduced neighbor report TBTT info length A new field (20MHz PSD - 1 byte) was added to the RNR TBTT info field. Adjust the expected TBTT info length accordingly. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20201129172929.b503adccce6a.Ie684e1d3039c111bf2d521bf762aaec3f7a24d2e@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index cbd294239430..72ff75fb1971 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3836,15 +3836,15 @@ static inline bool for_each_element_completed(const struct element *element, #define WLAN_RSNX_CAPA_SAE_H2E BIT(5) /* - * reduced neighbor report, based on Draft P802.11ax_D5.0, - * section 9.4.2.170 + * reduced neighbor report, based on Draft P802.11ax_D6.1, + * section 9.4.2.170 and accepted contributions. */ #define IEEE80211_AP_INFO_TBTT_HDR_TYPE 0x03 #define IEEE80211_AP_INFO_TBTT_HDR_FILTERED 0x04 #define IEEE80211_AP_INFO_TBTT_HDR_COLOC 0x08 #define IEEE80211_AP_INFO_TBTT_HDR_COUNT 0xF0 -#define IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM 8 -#define IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM 12 +#define IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM 9 +#define IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM 13 #define IEEE80211_RNR_TBTT_PARAMS_OCT_RECOMMENDED 0x01 #define IEEE80211_RNR_TBTT_PARAMS_SAME_SSID 0x02 -- cgit v1.2.3 From 84e0d87c9944eb36ae6037af5cb6905f67c074c5 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 10 Dec 2020 14:30:12 +0000 Subject: thermal: devfreq_cooling: add new registration functions with Energy Model The Energy Model (EM) framework supports devices such as Devfreq. Create new registration function which automatically register EM for the thermal devfreq_cooling devices. This patch prepares the code for coming changes which are going to replace old power model with the new EM. Reviewed-by: Ionela Voinescu Signed-off-by: Lukasz Luba Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20201210143014.24685-4-lukasz.luba@arm.com --- include/linux/devfreq_cooling.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index 9df2dfca68dd..7a9fbcc7b265 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -65,6 +65,9 @@ struct thermal_cooling_device * of_devfreq_cooling_register(struct device_node *np, struct devfreq *df); struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df); void devfreq_cooling_unregister(struct thermal_cooling_device *dfc); +struct thermal_cooling_device * +devfreq_cooling_em_register(struct devfreq *df, + struct devfreq_cooling_power *dfc_power); #else /* !CONFIG_DEVFREQ_THERMAL */ @@ -87,6 +90,13 @@ devfreq_cooling_register(struct devfreq *df) return ERR_PTR(-EINVAL); } +static inline struct thermal_cooling_device * +devfreq_cooling_em_register(struct devfreq *df, + struct devfreq_cooling_power *dfc_power) +{ + return ERR_PTR(-EINVAL); +} + static inline void devfreq_cooling_unregister(struct thermal_cooling_device *dfc) { -- cgit v1.2.3 From 615510fe13bd2434610193f1acab53027d5146d6 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 10 Dec 2020 14:30:13 +0000 Subject: thermal: devfreq_cooling: remove old power model and use EM Remove old power model and use new Energy Model to calculate the power budget. It drops static + dynamic power calculations and power table in order to use Energy Model performance domain data. This model should be easy to use and could find more users. It is also less complicated to setup the needed structures. Reviewed-by: Ionela Voinescu Signed-off-by: Lukasz Luba Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20201210143014.24685-5-lukasz.luba@arm.com --- include/linux/devfreq_cooling.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index 7a9fbcc7b265..14baa73fc2de 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -16,17 +16,6 @@ /** * struct devfreq_cooling_power - Devfreq cooling power ops - * @get_static_power: Take voltage, in mV, and return the static power - * in mW. If NULL, the static power is assumed - * to be 0. - * @get_dynamic_power: Take voltage, in mV, and frequency, in HZ, and - * return the dynamic power draw in mW. If NULL, - * a simple power model is used. - * @dyn_power_coeff: Coefficient for the simple dynamic power model in - * mW/(MHz mV mV). - * If get_dynamic_power() is NULL, then the - * dynamic power is calculated as - * @dyn_power_coeff * frequency * voltage^2 * @get_real_power: When this is set, the framework uses it to ask the * device driver for the actual power. * Some devices have more sophisticated methods @@ -46,14 +35,8 @@ * max total (static + dynamic) power value for each OPP. */ struct devfreq_cooling_power { - unsigned long (*get_static_power)(struct devfreq *devfreq, - unsigned long voltage); - unsigned long (*get_dynamic_power)(struct devfreq *devfreq, - unsigned long freq, - unsigned long voltage); int (*get_real_power)(struct devfreq *df, u32 *power, unsigned long freq, unsigned long voltage); - unsigned long dyn_power_coeff; }; #ifdef CONFIG_DEVFREQ_THERMAL -- cgit v1.2.3 From d7203eedf4f68e9909fd489453168a9d26bf0c3d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Dec 2020 13:15:11 +0100 Subject: thermal/core: Add critical and hot ops Currently there is no way to the sensors to directly call an ops in interrupt mode without calling thermal_zone_device_update assuming all the trip points are defined. A sensor may want to do something special if a trip point is hot or critical. This patch adds the critical and hot ops to the thermal zone device, so a sensor can directly invoke them or let the thermal framework to call the sensor specific ones. Tested-by: Kai-Heng Feng Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/20201210121514.25760-2-daniel.lezcano@linaro.org --- include/linux/thermal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index d07ea27e72a9..31b84404f047 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -79,6 +79,8 @@ struct thermal_zone_device_ops { enum thermal_trend *); int (*notify) (struct thermal_zone_device *, int, enum thermal_trip_type); + void (*hot)(struct thermal_zone_device *); + void (*critical)(struct thermal_zone_device *); }; struct thermal_cooling_device_ops { @@ -399,6 +401,7 @@ void thermal_cdev_update(struct thermal_cooling_device *); void thermal_notify_framework(struct thermal_zone_device *, int); int thermal_zone_device_enable(struct thermal_zone_device *tz); int thermal_zone_device_disable(struct thermal_zone_device *tz); +void thermal_zone_device_critical(struct thermal_zone_device *tz); #else static inline struct thermal_zone_device *thermal_zone_device_register( const char *type, int trips, int mask, void *devdata, -- cgit v1.2.3 From b388fa50142510fb6477f130bb1b3f05a0a263a1 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 9 Nov 2020 09:41:21 +0000 Subject: Revert "genirq: Add fasteoi IPI flow" handle_percpu_devid_fasteoi_ipi() has no more users, and handle_percpu_devid_irq() can do all that it was supposed to do. Get rid of it. This reverts commit c5e5ec033c4ab25c53f1fd217849e75deb0bf7bf. Signed-off-by: Valentin Schneider Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201109094121.29975-6-valentin.schneider@arm.com --- include/linux/irq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index c54365309e97..ca26bec51cec 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -647,7 +647,6 @@ static inline int irq_set_parent(int irq, int parent_irq) */ extern void handle_level_irq(struct irq_desc *desc); extern void handle_fasteoi_irq(struct irq_desc *desc); -extern void handle_percpu_devid_fasteoi_ipi(struct irq_desc *desc); extern void handle_edge_irq(struct irq_desc *desc); extern void handle_edge_eoi_irq(struct irq_desc *desc); extern void handle_simple_irq(struct irq_desc *desc); -- cgit v1.2.3 From 1d3aec89286254487df7641c30f1b14ad1d127a5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 2 Dec 2020 18:36:53 +0800 Subject: genirq/affinity: Add irq_update_affinity_desc() Add a function to allow the affinity of an interrupt be switched to managed, such that interrupts allocated for platform devices may be managed. This new interface has certain limitations, and attempts to use it in the following circumstances will fail: - For when the kernel is configured for generic IRQ reservation mode (in config GENERIC_IRQ_RESERVATION_MODE). The reason being that it could conflict with managed vs. non-managed interrupt accounting. - The interrupt is already started, which should not be the case during init - The interrupt is already configured as managed, which means double init Suggested-by: Thomas Gleixner Signed-off-by: John Garry Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1606905417-183214-2-git-send-email-john.garry@huawei.com --- include/linux/interrupt.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ee8299eb1f52..870b3251e174 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -352,6 +352,8 @@ extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); +extern int irq_update_affinity_desc(unsigned int irq, + struct irq_affinity_desc *affinity); extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); @@ -387,6 +389,12 @@ static inline int irq_set_affinity_hint(unsigned int irq, return -EINVAL; } +static inline int irq_update_affinity_desc(unsigned int irq, + struct irq_affinity_desc *affinity) +{ + return -EINVAL; +} + static inline int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) { -- cgit v1.2.3 From 9806731db684a475ade1e95d166089b9edbd9da3 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 2 Dec 2020 18:36:54 +0800 Subject: resource: Add irqresource_disabled() Add a common function to set the fields for a irq resource to disabled, which mimics what is done in acpi_dev_irqresource_disabled(), with a view to replace that function. Signed-off-by: John Garry Signed-off-by: Marc Zyngier Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/1606905417-183214-3-git-send-email-john.garry@huawei.com --- include/linux/ioport.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5135d4b86cd6..f9bf374f9633 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -307,6 +307,13 @@ struct resource *devm_request_free_mem_region(struct device *dev, struct resource *request_free_mem_region(struct resource *base, unsigned long size, const char *name); +static inline void irqresource_disabled(struct resource *res, u32 irq) +{ + res->start = irq; + res->end = irq; + res->flags = IORESOURCE_IRQ | IORESOURCE_DISABLED | IORESOURCE_UNSET; +} + #ifdef CONFIG_IO_STRICT_DEVMEM void revoke_devmem(struct resource *res); #else -- cgit v1.2.3 From e15f2fa959f2cce8a05e8e3a596e75d068cd42c5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 2 Dec 2020 18:36:56 +0800 Subject: driver core: platform: Add devm_platform_get_irqs_affinity() Drivers for multi-queue platform devices may also want managed interrupts for handling HW queue completion interrupts, so add support. The function accepts an affinity descriptor pointer, which covers all IRQs expected for the device. The function is devm class as the only current in-tree user will also use devm method for requesting the interrupts; as such, the function is made as devm as it can ensure ordering of freeing the irq and disposing of the mapping. Signed-off-by: John Garry Signed-off-by: Marc Zyngier Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/1606905417-183214-5-git-send-email-john.garry@huawei.com --- include/linux/platform_device.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 77a2aada106d..4d75633e6735 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -15,6 +15,7 @@ #define PLATFORM_DEVID_NONE (-1) #define PLATFORM_DEVID_AUTO (-2) +struct irq_affinity; struct mfd_cell; struct property_entry; struct platform_device_id; @@ -70,6 +71,11 @@ devm_platform_ioremap_resource_byname(struct platform_device *pdev, extern int platform_get_irq(struct platform_device *, unsigned int); extern int platform_get_irq_optional(struct platform_device *, unsigned int); extern int platform_irq_count(struct platform_device *); +extern int devm_platform_get_irqs_affinity(struct platform_device *dev, + struct irq_affinity *affd, + unsigned int minvec, + unsigned int maxvec, + int **irqs); extern struct resource *platform_get_resource_byname(struct platform_device *, unsigned int, const char *); -- cgit v1.2.3 From 426506a7e0f1902268c3edbdc7e5475624a9d18b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Dec 2020 11:04:24 +0200 Subject: dmaengine: ti: k3-udma-glue: Add function to get device pointer for DMA API Glue layer users should use the device of the DMA for DMA mapping and allocations as it is the DMA which accesses to descriptors and buffers, not the clients Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Link: https://lore.kernel.org/r/20201208090440.31792-5-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dma/k3-udma-glue.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index 5eb34ad973a7..d7c12f31377c 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -41,6 +41,8 @@ void k3_udma_glue_reset_tx_chn(struct k3_udma_glue_tx_channel *tx_chn, u32 k3_udma_glue_tx_get_hdesc_size(struct k3_udma_glue_tx_channel *tx_chn); u32 k3_udma_glue_tx_get_txcq_id(struct k3_udma_glue_tx_channel *tx_chn); int k3_udma_glue_tx_get_irq(struct k3_udma_glue_tx_channel *tx_chn); +struct device * + k3_udma_glue_tx_get_dma_device(struct k3_udma_glue_tx_channel *tx_chn); enum { K3_UDMA_GLUE_SRC_TAG_LO_KEEP = 0, @@ -130,5 +132,7 @@ int k3_udma_glue_rx_flow_enable(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_idx); int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_idx); +struct device * + k3_udma_glue_rx_get_dma_device(struct k3_udma_glue_rx_channel *rx_chn); #endif /* K3_UDMA_GLUE_H_ */ -- cgit v1.2.3 From 4f910c035f38053ac8eb63a672c78862c535cd0f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Dec 2020 11:04:27 +0200 Subject: dmaengine: of-dma: Add support for optional router configuration callback Additional configuration for the DMA event router might be needed for a channel which can not be done during device_alloc_chan_resources callback since the router information is not yet present for the drivers. If there is a need for additional configuration for the channel if DMA router is in use, then the driver can implement the device_router_config callback. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201208090440.31792-8-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 493a047ed0a2..aed44888cad3 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -805,6 +805,7 @@ struct dma_filter { * by tx_status * @device_alloc_chan_resources: allocate resources and return the * number of allocated descriptors + * @device_router_config: optional callback for DMA router configuration * @device_free_chan_resources: release DMA channel's resources * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation @@ -879,6 +880,7 @@ struct dma_device { enum dma_residue_granularity residue_granularity; int (*device_alloc_chan_resources)(struct dma_chan *chan); + int (*device_router_config)(struct dma_chan *chan); void (*device_free_chan_resources)(struct dma_chan *chan); struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( -- cgit v1.2.3 From ab650ef6d548153862119e1bf3bf267510707f48 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Dec 2020 11:04:28 +0200 Subject: dmaengine: Add support for per channel coherency handling If the DMA device supports per channel coherency configuration (a channel can be configured to have coherent or not coherent view) then a single device (the DMA controller's device) can not be used for dma_api for all channels as channels can have different coherency. Introduce custom_dma_mapping flag for the dma_chan and a new helper to get the device pointer to be used for dma_api for the given channel. Client drivers should be updated to be able to support per channel coherency by: - dma_map_single(chan->device->dev, ptr, size, DMA_TO_DEVICE); + struct device *dma_dev = dmaengine_get_dma_device(chan); + + dma_map_single(dma_dev, ptr, size, DMA_TO_DEVICE); Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201208090440.31792-9-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index aed44888cad3..68130f5f599e 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -357,11 +357,14 @@ struct dma_chan { * @chan: driver channel device * @device: sysfs device * @dev_id: parent dma_device dev_id + * @chan_dma_dev: The channel is using custom/different dma-mapping + * compared to the parent dma_device */ struct dma_chan_dev { struct dma_chan *chan; struct device device; int dev_id; + bool chan_dma_dev; }; /** @@ -1618,4 +1621,13 @@ dmaengine_get_direction_text(enum dma_transfer_direction dir) return "invalid"; } } + +static inline struct device *dmaengine_get_dma_device(struct dma_chan *chan) +{ + if (chan->dev->chan_dma_dev) + return &chan->dev->device; + + return chan->device->dev; +} + #endif /* DMAENGINE_H */ -- cgit v1.2.3 From b9366e2577a38ca5322f326cff9752c2008597c6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Dec 2020 11:04:33 +0200 Subject: dmaengine: ti: k3-psil: Extend psil_endpoint_config for K3 PKTDMA Additional fields needed for K3 PKTDMA to be able to handle the mapped channels (channels are locked to handle specific threads) and flow ranges for these mapped threads. PKTDMA also introduces tflow for tx channels which can not be found in K3 UDMA architecture. Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Link: https://lore.kernel.org/r/20201208090440.31792-14-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dma/k3-psil.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/k3-psil.h b/include/linux/dma/k3-psil.h index 1962f75fa2d3..36e22c5a0f29 100644 --- a/include/linux/dma/k3-psil.h +++ b/include/linux/dma/k3-psil.h @@ -50,6 +50,15 @@ enum psil_endpoint_type { * @channel_tpl: Desired throughput level for the channel * @pdma_acc32: ACC32 must be enabled on the PDMA side * @pdma_burst: BURST must be enabled on the PDMA side + * @mapped_channel_id: PKTDMA thread to channel mapping for mapped channels. + * The thread must be serviced by the specified channel if + * mapped_channel_id is >= 0 in case of PKTDMA + * @flow_start: PKDMA flow range start of mapped channel. Unmapped + * channels use flow_id == chan_id + * @flow_num: PKDMA flow count of mapped channel. Unmapped channels + * use flow_id == chan_id + * @default_flow_id: PKDMA default (r)flow index of mapped channel. + * Must be within the flow range of the mapped channel. */ struct psil_endpoint_config { enum psil_endpoint_type ep_type; @@ -63,6 +72,13 @@ struct psil_endpoint_config { /* PDMA properties, valid for PSIL_EP_PDMA_* */ unsigned pdma_acc32:1; unsigned pdma_burst:1; + + /* PKDMA mapped channel */ + int mapped_channel_id; + /* PKTDMA tflow and rflow ranges for mapped channel */ + u16 flow_start; + u16 flow_num; + u16 default_flow_id; }; int psil_set_new_ep_config(struct device *dev, const char *name, -- cgit v1.2.3 From fc373e47d72605cc3f5012ddda49d2dca430d51f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Dec 2020 11:04:35 +0200 Subject: dmaengine: ti: Add support for k3 event routers In k3 architecture a DMA channel (in TR momde) can be triggered by global events, origination from different modules. The events for triggers can be sent from any module which is connected to PSI-L fabric, but the event number to be sent is DMA channel specific, it is only known after the channel itself is requested. The router operation needs to be split up: - route_allocate: configure the dma_spec for the DMA and store the configuration which is needed for the router's input - set_event: callback used by the DMA driver to set the event number for the channel and enable the routing Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201208090440.31792-16-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dma/k3-event-router.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/dma/k3-event-router.h (limited to 'include/linux') diff --git a/include/linux/dma/k3-event-router.h b/include/linux/dma/k3-event-router.h new file mode 100644 index 000000000000..e3f88b2f87be --- /dev/null +++ b/include/linux/dma/k3-event-router.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Texas Instruments Incorporated - https://www.ti.com + */ + +#ifndef K3_EVENT_ROUTER_ +#define K3_EVENT_ROUTER_ + +#include + +struct k3_event_route_data { + void *priv; + int (*set_event)(void *priv, u32 event); +}; + +#endif /* K3_EVENT_ROUTER_ */ -- cgit v1.2.3 From d782298c6f6b854452965b56d91616dfb60490c5 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 8 Dec 2020 11:04:36 +0200 Subject: soc: ti: k3-ringacc: add AM64 DMA rings support. The DMAs in AM64 have built in rings compared to AM654/J721e/J7200 where a separate and generic ringacc is used. The ring SW interface is similar to ringacc with some major architectural differences, like They are part of the DMA (BCDMA or PKTDMA). They are dual mode rings are modeled as pair of Rings objects which has common configuration and memory buffer, but separate real-time control register sets for each direction mem2dev (forward) and dev2mem (reverse). The ringacc driver must be initialized for DMA rings use with k3_ringacc_dmarings_init() as it is not an independent device as ringacc is. AM64 rings must be requested only using k3_ringacc_request_rings_pair(), and forward ring must always be initialized/configured. After this any other Ringacc APIs can be used without any callers changes. Signed-off-by: Grygorii Strashko Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201208090440.31792-17-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/soc/ti/k3-ringacc.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/ti/k3-ringacc.h b/include/linux/soc/ti/k3-ringacc.h index 658dc71d2901..39b022b92598 100644 --- a/include/linux/soc/ti/k3-ringacc.h +++ b/include/linux/soc/ti/k3-ringacc.h @@ -70,6 +70,7 @@ struct k3_ring; * @dma_dev: Master device which is using and accessing to the ring * memory when the mode is K3_RINGACC_RING_MODE_RING. Memory allocations * should be done using this device. + * @asel: Address Space Select value for physical addresses */ struct k3_ring_cfg { u32 size; @@ -79,6 +80,7 @@ struct k3_ring_cfg { u32 flags; struct device *dma_dev; + u32 asel; }; #define K3_RINGACC_RING_ID_ANY (-1) @@ -250,4 +252,19 @@ int k3_ringacc_ring_pop_tail(struct k3_ring *ring, void *elem); u32 k3_ringacc_get_tisci_dev_id(struct k3_ring *ring); +/* DMA ring support */ +struct ti_sci_handle; + +/** + * struct struct k3_ringacc_init_data - Initialization data for DMA rings + */ +struct k3_ringacc_init_data { + const struct ti_sci_handle *tisci; + u32 tisci_dev_id; + u32 num_rings; +}; + +struct k3_ringacc *k3_ringacc_dmarings_init(struct platform_device *pdev, + struct k3_ringacc_init_data *data); + #endif /* __SOC_TI_K3_RINGACC_API_H_ */ -- cgit v1.2.3 From 5b65781d06ea90ef2f8e51a13352c43c3daa8cdc Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Tue, 8 Dec 2020 11:04:40 +0200 Subject: dmaengine: ti: k3-udma-glue: Add support for K3 PKTDMA This commit adds support for PKTDMA in k3-udma glue driver. Use new psil_endpoint_config struct to get static data for a given channel or a flow during setup. Make sure that the RX flows being mapped to a RX channel is within the range of flows that is been allocated to that RX channel. Signed-off-by: Vignesh Raghavendra Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201208090440.31792-21-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dma/k3-udma-glue.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index d7c12f31377c..e443be4d3b4b 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -43,6 +43,10 @@ u32 k3_udma_glue_tx_get_txcq_id(struct k3_udma_glue_tx_channel *tx_chn); int k3_udma_glue_tx_get_irq(struct k3_udma_glue_tx_channel *tx_chn); struct device * k3_udma_glue_tx_get_dma_device(struct k3_udma_glue_tx_channel *tx_chn); +void k3_udma_glue_tx_dma_to_cppi5_addr(struct k3_udma_glue_tx_channel *tx_chn, + dma_addr_t *addr); +void k3_udma_glue_tx_cppi5_to_dma_addr(struct k3_udma_glue_tx_channel *tx_chn, + dma_addr_t *addr); enum { K3_UDMA_GLUE_SRC_TAG_LO_KEEP = 0, @@ -134,5 +138,9 @@ int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_idx); struct device * k3_udma_glue_rx_get_dma_device(struct k3_udma_glue_rx_channel *rx_chn); +void k3_udma_glue_rx_dma_to_cppi5_addr(struct k3_udma_glue_rx_channel *rx_chn, + dma_addr_t *addr); +void k3_udma_glue_rx_cppi5_to_dma_addr(struct k3_udma_glue_rx_channel *rx_chn, + dma_addr_t *addr); #endif /* K3_UDMA_GLUE_H_ */ -- cgit v1.2.3 From e998879d4fb7991856916972168cf27c0d86ed12 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Thu, 10 Dec 2020 01:25:15 +0000 Subject: x86,swiotlb: Adjust SWIOTLB bounce buffer size for SEV guests For SEV, all DMA to and from guest has to use shared (un-encrypted) pages. SEV uses SWIOTLB to make this happen without requiring changes to device drivers. However, depending on the workload being run, the default 64MB of it might not be enough and it may run out of buffers to use for DMA, resulting in I/O errors and/or performance degradation for high I/O workloads. Adjust the default size of SWIOTLB for SEV guests using a percentage of the total memory available to guest for the SWIOTLB buffers. Adds a new sev_setup_arch() function which is invoked from setup_arch() and it calls into a new swiotlb generic code function swiotlb_adjust_size() to do the SWIOTLB buffer adjustment. v5 fixed build errors and warnings as Reported-by: kbuild test robot Signed-off-by: Ashish Kalra Co-developed-by: Borislav Petkov Signed-off-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index fbdc65782195..d9c9fc9ca5d2 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -30,6 +30,9 @@ enum swiotlb_force { */ #define IO_TLB_SHIFT 11 +/* default to 64MB */ +#define IO_TLB_DEFAULT_SIZE (64UL<<20) + extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); extern unsigned long swiotlb_nr_tbl(void); @@ -78,6 +81,7 @@ void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); bool is_swiotlb_active(void); +void __init swiotlb_adjust_size(unsigned long new_size); #else #define swiotlb_force SWIOTLB_NO_FORCE static inline bool is_swiotlb_buffer(phys_addr_t paddr) @@ -100,6 +104,10 @@ static inline bool is_swiotlb_active(void) { return false; } + +static inline void swiotlb_adjust_size(unsigned long new_size) +{ +} #endif /* CONFIG_SWIOTLB */ extern void swiotlb_print_info(void); -- cgit v1.2.3 From 14dc3983b5dff513a90bd5a8cc90acaf7867c3d0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Dec 2020 13:36:38 -0800 Subject: kbuild: avoid static_assert for genksyms genksyms does not know or care about the _Static_assert() built-in, and sometimes falls back to ignoring the later symbols, which causes undefined behavior such as WARNING: modpost: EXPORT symbol "ethtool_set_ethtool_phy_ops" [vmlinux] version generation failed, symbol will not be versioned. ld: net/ethtool/common.o: relocation R_AARCH64_ABS32 against `__crc_ethtool_set_ethtool_phy_ops' can not be used when making a shared object net/ethtool/common.o:(_ftrace_annotated_branch+0x0): dangerous relocation: unsupported relocation Redefine static_assert for genksyms to avoid that. Link: https://lkml.kernel.org/r/20201203230955.1482058-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Suggested-by: Ard Biesheuvel Cc: Masahiro Yamada Cc: Michal Marek Cc: Kees Cook Cc: Rikard Falkeborn Cc: Marco Elver Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/build_bug.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index e3a0be2c90ad..7bb66e15b481 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -77,4 +77,9 @@ #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) +#ifdef __GENKSYMS__ +/* genksyms gets confused by _Static_assert */ +#define _Static_assert(expr, ...) +#endif + #endif /* _LINUX_BUILD_BUG_H */ -- cgit v1.2.3 From 6e7b64b9dd6d96537d816ea07ec26b7dedd397b9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Dec 2020 13:36:46 -0800 Subject: elfcore: fix building with clang kernel/elfcore.c only contains weak symbols, which triggers a bug with clang in combination with recordmcount: Cannot find symbol for section 2: .text. kernel/elfcore.o: failed Move the empty stubs into linux/elfcore.h as inline functions. As only two architectures use these, just use the architecture specific Kconfig symbols to key off the declaration. Link: https://lkml.kernel.org/r/20201204165742.3815221-2-arnd@kernel.org Signed-off-by: Arnd Bergmann Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Barret Rhoden Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/elfcore.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 46c3d691f677..de51c1bef27d 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -104,6 +104,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } +#if defined(CONFIG_UM) || defined(CONFIG_IA64) /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its @@ -118,5 +119,26 @@ elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset); extern int elf_core_write_extra_data(struct coredump_params *cprm); extern size_t elf_core_extra_data_size(void); +#else +static inline Elf_Half elf_core_extra_phdrs(void) +{ + return 0; +} + +static inline int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + return 1; +} + +static inline int elf_core_write_extra_data(struct coredump_params *cprm) +{ + return 1; +} + +static inline size_t elf_core_extra_data_size(void) +{ + return 0; +} +#endif #endif /* _LINUX_ELFCORE_H */ -- cgit v1.2.3 From 2ab695aa8eb8f3226f68a2b91fc6103b56fcb57d Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 11 Dec 2020 12:26:29 -0800 Subject: ACPI: Use fwnode_init() to set up fwnode Commit 01bb86b380a3 ("driver core: Add fwnode_init()") was supposed to fix up all instances of fwnode creation to use fwnode_init(). But looks like this instance was missed. This causes a NULL pointer dereference during device_add() [1]. So, fix it. [ 60.792324][ T1] Call trace: [ 60.795495][ T1] device_add+0xf60/0x16b0 __fw_devlink_link_to_consumers at drivers/base/core.c:1583 (inlined by) fw_devlink_link_device at drivers/base/core.c:1726 (inlined by) device_add at drivers/base/core.c:3088 [ 60.799813][ T1] platform_device_add+0x274/0x628 [ 60.804833][ T1] acpi_iort_init+0x9d8/0xc50 [ 60.809415][ T1] acpi_init+0x45c/0x4e8 [ 60.813556][ T1] do_one_initcall+0x170/0xb70 [ 60.818224][ T1] kernel_init_freeable+0x6a8/0x734 [ 60.823332][ T1] kernel_init+0x18/0x12c [ 60.827566][ T1] ret_from_fork+0x10/0x1c [ 60.838756][ T1] ---[ end trace fa5c8ce17a226d83 ]--- [1] - https://lore.kernel.org/linux-arm-kernel/02e7047071f0b54b046ac472adeeb3fafabc643c.camel@redhat.com/ Fixes: 01bb86b380a3 ("driver core: Add fwnode_init()") Reported-by: Qian Cai Suggested-by: Robin Murphy Tested-by: Marc Zyngier Acked-by: Rafael J. Wysocki Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201211202629.2164655-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 39263c6b52e1..2630c2e953f7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -55,7 +55,7 @@ static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) if (!fwnode) return NULL; - fwnode->ops = &acpi_static_fwnode_ops; + fwnode_init(fwnode, &acpi_static_fwnode_ops); return fwnode; } -- cgit v1.2.3 From 98b89b649fce39dacb9dc036d6d0fdb8caff73f7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 16:03:01 -0600 Subject: signal: kill JOBCTL_TASK_WORK It's no longer used, get rid of it. Signed-off-by: Jens Axboe --- include/linux/sched/jobctl.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index d2b4204ba4d3..fa067de9f1a9 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -19,7 +19,6 @@ struct task_struct; #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ -#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -29,10 +28,9 @@ struct task_struct; #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) -#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); extern void task_clear_jobctl_trapping(struct task_struct *task); -- cgit v1.2.3 From e296dc4996b8094ccde45d19090d804c4103513e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 16:04:39 -0600 Subject: kernel: remove checking for TIF_NOTIFY_SIGNAL It's available everywhere now, no need to check or add dummy defines. Signed-off-by: Jens Axboe --- include/linux/entry-common.h | 4 ---- include/linux/sched/signal.h | 2 -- include/linux/tracehook.h | 4 ---- 3 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index b9711e813ec2..abec3a5ae799 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -37,10 +37,6 @@ # define _TIF_UPROBE (0) #endif -#ifndef _TIF_NOTIFY_SIGNAL -# define _TIF_NOTIFY_SIGNAL (0) -#endif - /* * TIF flags handled in syscall_enter_from_user_mode() */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index bd5afa076189..24b7b862e043 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -360,7 +360,6 @@ static inline int task_sigpending(struct task_struct *p) static inline int signal_pending(struct task_struct *p) { -#if defined(TIF_NOTIFY_SIGNAL) /* * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same * behavior in terms of ensuring that we break out of wait loops @@ -368,7 +367,6 @@ static inline int signal_pending(struct task_struct *p) */ if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) return 1; -#endif return task_sigpending(p); } diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index f7d82e4fafd6..ee9ab7dbc8c3 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -205,12 +205,10 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) */ static inline void tracehook_notify_signal(void) { -#if defined(TIF_NOTIFY_SIGNAL) clear_thread_flag(TIF_NOTIFY_SIGNAL); smp_mb__after_atomic(); if (current->task_works) task_work_run(); -#endif } /* @@ -218,11 +216,9 @@ static inline void tracehook_notify_signal(void) */ static inline void set_notify_signal(struct task_struct *task) { -#if defined(TIF_NOTIFY_SIGNAL) if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && !wake_up_state(task, TASK_INTERRUPTIBLE)) kick_process(task); -#endif } #endif /* */ -- cgit v1.2.3 From 3cabca87b329cbcbdf295be0094adbd72c7b1f67 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 12 Dec 2020 18:29:20 +0100 Subject: ntp: Fix prototype in the !CONFIG_GENERIC_CMOS_UPDATE case In the !CONFIG_GENERIC_CMOS_UPDATE case the update_persistent_clock64() function gets defined as a stub in ntp.c - make the prototype in conditional on CONFIG_GENERIC_CMOS_UPDATE as well. Fixes: 76e87d96b30b5 ("ntp: Consolidate the RTC update implementation") Signed-off-by: Ingo Molnar Acked-by: Thomas Gleixner --- include/linux/timekeeping.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7f7e4a3f4394..929d3f3937c0 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -303,6 +303,8 @@ extern int persistent_clock_is_local; extern void read_persistent_clock64(struct timespec64 *ts); void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock, struct timespec64 *boot_offset); +#ifdef CONFIG_GENERIC_CMOS_UPDATE extern int update_persistent_clock64(struct timespec64 now); +#endif #endif -- cgit v1.2.3 From 9a20f6f4e6ba9713605fbf7e7426ca22f1181545 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Dec 2020 15:16:46 -0500 Subject: SUNRPC: Fixes for xdr_align_data() The main use case right now for xdr_align_data() is to shift the page data to the left, and in practice shrink the total XDR data buffer. This patch ensures that we fix up the accounting for the buffer length as we shift that data around. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 9548d075e06d..2b4e44bb0654 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -252,7 +252,7 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); -extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); +extern unsigned int xdr_align_data(struct xdr_stream *, unsigned int offset, unsigned int length); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); /** -- cgit v1.2.3 From c4f2f591f02c392ea7de018d2733748bf4c7b5f5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Dec 2020 17:15:09 -0500 Subject: SUNRPC: Fix xdr_expand_hole() We do want to try to grow the buffer if possible, but if that attempt fails, we still want to move the data and truncate the XDR message. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2b4e44bb0654..178f499e2283 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -253,7 +253,7 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); extern unsigned int xdr_align_data(struct xdr_stream *, unsigned int offset, unsigned int length); -extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); +extern unsigned int xdr_expand_hole(struct xdr_stream *, unsigned int offset, unsigned int length); /** * xdr_stream_remaining - Return the number of bytes remaining in the stream -- cgit v1.2.3 From f8d0e60f1056687826abc1eded98f0ea067dfc4c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Dec 2020 22:56:18 -0500 Subject: SUNRPC: Cleanup - constify a number of xdr_buf helpers There are a number of xdr helpers for struct xdr_buf that do not change the structure itself. Mark those as taking const pointers for documentation purposes. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 178f499e2283..68d49fdc4ee9 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -128,8 +128,8 @@ __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); -void xdr_terminate_string(struct xdr_buf *, const u32); -size_t xdr_buf_pagecount(struct xdr_buf *buf); +void xdr_terminate_string(const struct xdr_buf *, const u32); +size_t xdr_buf_pagecount(const struct xdr_buf *buf); int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); void xdr_free_bvec(struct xdr_buf *buf); @@ -182,14 +182,14 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) * XDR buffer helper functions */ extern void xdr_shift_buf(struct xdr_buf *, size_t); -extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); -extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_from_iov(const struct kvec *, struct xdr_buf *); +extern int xdr_buf_subsegment(const struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); extern void xdr_buf_trim(struct xdr_buf *, unsigned int); -extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); +extern int read_bytes_from_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int); +extern int write_bytes_to_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int); -extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); -extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); +extern int xdr_encode_word(const struct xdr_buf *, unsigned int, u32); +extern int xdr_decode_word(const struct xdr_buf *, unsigned int, u32 *); struct xdr_array2_desc; typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); @@ -200,9 +200,9 @@ struct xdr_array2_desc { xdr_xcode_elem_t xcode; }; -extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base, +extern int xdr_decode_array2(const struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); -extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base, +extern int xdr_encode_array2(const struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len); @@ -251,7 +251,7 @@ extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buf extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); -extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); extern unsigned int xdr_align_data(struct xdr_stream *, unsigned int offset, unsigned int length); extern unsigned int xdr_expand_hole(struct xdr_stream *, unsigned int offset, unsigned int length); -- cgit v1.2.3 From 7c03e2cda4a584cadc398e8f6641ca9988a39d52 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 14 Dec 2020 15:26:13 +0100 Subject: vfs: move cap_convert_nscap() call into vfs_setxattr() cap_convert_nscap() does permission checking as well as conversion of the xattr value conditionally based on fs's user-ns. This is needed by overlayfs and probably other layered fs (ecryptfs) and is what vfs_foo() is supposed to do anyway. Signed-off-by: Miklos Szeredi Acked-by: James Morris --- include/linux/capability.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 1e7fe311cabe..b2f698915c0f 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -270,6 +270,6 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns) /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); -extern int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size); +extern int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size); #endif /* !_LINUX_CAPABILITY_H */ -- cgit v1.2.3 From e0a6aa30504cb8179d07609fb6386705e8f00663 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 13 Dec 2020 09:39:40 +0100 Subject: efi: ia64: disable the capsule loader EFI capsule loading is a feature that was introduced into EFI long after its initial introduction on Itanium, and it is highly unlikely that IA64 systems are receiving firmware updates in the first place, let alone using EFI capsules. So let's disable capsule support altogether on IA64. This fixes a build error on IA64 due to a recent change that added an unconditional include of asm/efi.h, which IA64 does not provide. While at it, tweak the make rules a bit so that the EFI capsule component that is always builtin (even if the EFI capsule loader itself is built as a module) is omitted for all architectures if the module is not enabled in the build. Cc: Tony Luck Link: https://lore.kernel.org/linux-efi/20201214152200.38353-1-ardb@kernel.org Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 1cd5d91d8ca1..763b816ba19c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -817,12 +817,6 @@ static inline bool efi_enabled(int feature) static inline void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} -static inline bool -efi_capsule_pending(int *reset_type) -{ - return false; -} - static inline bool efi_soft_reserve_enabled(void) { return false; @@ -1038,6 +1032,7 @@ bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, size_t len); +#if IS_ENABLED(CONFIG_EFI_CAPSULE_LOADER) extern bool efi_capsule_pending(int *reset_type); extern int efi_capsule_supported(efi_guid_t guid, u32 flags, @@ -1045,6 +1040,9 @@ extern int efi_capsule_supported(efi_guid_t guid, u32 flags, extern int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages); +#else +static inline bool efi_capsule_pending(int *reset_type) { return false; } +#endif #ifdef CONFIG_EFI_RUNTIME_MAP int efi_runtime_map_init(struct kobject *); -- cgit v1.2.3 From 50c9132ddfb2024e96900407beeec660cf9848bd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 25 Sep 2020 07:55:39 -0400 Subject: ceph: add new RECOVER mount_state when recovering session When recovering a session (a'la recover_session=clean), we want to do all of the operations that we do on a forced umount, but changing the mount state to SHUTDOWN is can cause queued MDS requests to fail when the session comes back. Most of those can idle until the session is recovered in this situation. Reserve SHUTDOWN state for forced umount, and make a new RECOVER state for the forced reconnect situation. Change several tests for equality with SHUTDOWN to test for that or RECOVER. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index c8645f0b797d..eb5a7ca13f9c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -104,6 +104,7 @@ enum { CEPH_MOUNT_UNMOUNTING, CEPH_MOUNT_UNMOUNTED, CEPH_MOUNT_SHUTDOWN, + CEPH_MOUNT_RECOVER, }; static inline unsigned long ceph_timeout_jiffies(unsigned long timeout) -- cgit v1.2.3 From 36c9478d6069994848c8897755b4380aa0a29dd3 Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Tue, 10 Nov 2020 21:20:08 +0800 Subject: libceph: remove unused port macros 1. monitor's default port is defined by CEPH_MON_PORT 2. CEPH_PORT_START and CEPH_PORT_LAST are not needed. Signed-off-by: Changcheng Liu Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/msgr.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 9e50aede46c8..46939485f2c3 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -8,15 +8,6 @@ #define CEPH_MON_PORT 6789 /* default monitor port */ -/* - * client-side processes will try to bind to ports in this - * range, simply for the benefit of tools like nmap or wireshark - * that would like to identify the protocol. - */ -#define CEPH_PORT_FIRST 6789 -#define CEPH_PORT_START 6800 /* non-monitors start here */ -#define CEPH_PORT_LAST 6900 - /* * tcp connection banner. include a protocol version. and adjust * whenever the wire protocol changes. try to keep this string length -- cgit v1.2.3 From 968cd14edc3acff251f98bdc1eb15f13f05dd5fb Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 9 Dec 2020 10:52:20 +0800 Subject: ceph: set osdmap epoch for setxattr When setting the file/dir layout, it may need data pool info. So in mds server, it needs to check the osdmap. At present, if mds doesn't find the data pool specified, it will try to get the latest osdmap. Now if pass the osd epoch for setxattr, the mds server can only check this epoch of osdmap. URL: https://tracker.ceph.com/issues/48504 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 455e9b9e2adf..c0f1b921ec69 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -424,6 +424,7 @@ union ceph_mds_request_args { } __attribute__ ((packed)) open; struct { __le32 flags; + __le32 osdmap_epoch; /* used for setting file/dir layouts */ } __attribute__ ((packed)) setxattr; struct { struct ceph_file_layout_legacy layout; -- cgit v1.2.3 From 4f1ddb1ea874c7703528a8c21b77b7f2462ee247 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 9 Dec 2020 10:12:59 -0500 Subject: ceph: implement updated ceph_mds_request_head structure When we added the btime feature in mainline ceph, we had to extend struct ceph_mds_request_args so that it could be set. Implement the same in the kernel client. Rename ceph_mds_request_head with a _old extension, and a union ceph_mds_request_args_ext to allow for the extended size of the new header format. Add the appropriate code to handle both formats in struct create_request_message and key the behavior on whether the peer supports CEPH_FEATURE_FS_BTIME. The gid_list field in the payload is now populated from the saved credential. For now, we don't add any support for setting the btime via setattr, but this does enable us to add that in the future. [ idryomov: break unnecessarily long lines ] Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index c0f1b921ec69..d44d98033d58 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -446,11 +446,25 @@ union ceph_mds_request_args { } __attribute__ ((packed)) lookupino; } __attribute__ ((packed)); +union ceph_mds_request_args_ext { + union ceph_mds_request_args old; + struct { + __le32 mode; + __le32 uid; + __le32 gid; + struct ceph_timespec mtime; + struct ceph_timespec atime; + __le64 size, old_size; /* old_size needed by truncate */ + __le32 mask; /* CEPH_SETATTR_* */ + struct ceph_timespec btime; + } __attribute__ ((packed)) setattr_ext; +}; + #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ #define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */ -struct ceph_mds_request_head { +struct ceph_mds_request_head_old { __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ __le32 flags; /* CEPH_MDS_FLAG_* */ @@ -463,6 +477,22 @@ struct ceph_mds_request_head { union ceph_mds_request_args args; } __attribute__ ((packed)); +#define CEPH_MDS_REQUEST_HEAD_VERSION 1 + +struct ceph_mds_request_head { + __le16 version; /* struct version */ + __le64 oldest_client_tid; + __le32 mdsmap_epoch; /* on client */ + __le32 flags; /* CEPH_MDS_FLAG_* */ + __u8 num_retry, num_fwd; /* count retry, fwd attempts */ + __le16 num_releases; /* # include cap/lease release records */ + __le32 op; /* mds op code */ + __le32 caller_uid, caller_gid; + __le64 ino; /* use this ino for openc, mkdir, mknod, + etc. (if replaying) */ + union ceph_mds_request_args_ext args; +} __attribute__ ((packed)); + /* cap/lease release record */ struct ceph_mds_request_release { __le64 ino, cap_id; /* ino and unique cap id */ -- cgit v1.2.3 From 418af5b3bfc4f1ef4854e83c5be8a0bdce51e95c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 29 Oct 2020 14:49:10 +0100 Subject: libceph: lower exponential backoff delay The current setting allows the backoff to climb up to 5 minutes. This is too high -- it becomes hard to tell whether the client is stuck on something or just in backoff. In userspace, ms_max_backoff is defaulted to 15 seconds. Let's do the same. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 60b324efd1c4..b47c7cc4c90a 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -241,8 +241,8 @@ struct ceph_msg { }; /* ceph connection fault delay defaults, for exponential backoff */ -#define BASE_DELAY_INTERVAL (HZ/2) -#define MAX_DELAY_INTERVAL (5 * 60 * HZ) +#define BASE_DELAY_INTERVAL (HZ / 4) +#define MAX_DELAY_INTERVAL (15 * HZ) /* * A single connection with another host. -- cgit v1.2.3 From 5cd8da3a1ca2160b8f9c2ff6a96762e66410ea38 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 13 Oct 2020 17:23:22 +0200 Subject: libceph: drop msg->ack_stamp field It is set in process_ack() but never used. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index b47c7cc4c90a..6f77e70db855 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -235,7 +235,6 @@ struct ceph_msg { bool more_to_follow; bool needs_out_seq; int front_alloc_len; - unsigned long ack_stamp; /* tx: when we were acked */ struct ceph_msgpool *pool; }; -- cgit v1.2.3 From 30be780a87211de75b93935c20a0913e46744a3f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Nov 2020 14:11:26 +0100 Subject: libceph: make con->state an int unsigned long is a leftover from when con->state used to be a set of bits managed with set_bit(), clear_bit(), etc. Save a bit of memory. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 6f77e70db855..f053de4f46dd 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -257,13 +257,13 @@ struct ceph_connection { struct ceph_messenger *msgr; + int state; atomic_t sock_state; struct socket *sock; struct ceph_entity_addr peer_addr; /* peer address */ struct ceph_entity_addr peer_addr_for_me; unsigned long flags; - unsigned long state; const char *error_msg; /* error message, if any */ struct ceph_entity_name peer_name; /* peer name */ -- cgit v1.2.3 From 6d7f62bfb5b5da6b0b37174c1fd32545f3b5b90d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Nov 2020 14:59:02 +0100 Subject: libceph: rename and export con->state states In preparation for msgr2, rename msgr1 specific states and move the defines to the header file. Also drop state transition comments. They don't cover all possible transitions (e.g. NEGOTIATING -> STANDBY, etc) and currently do more harm than good. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index f053de4f46dd..e6be85b67c6c 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -239,6 +239,16 @@ struct ceph_msg { struct ceph_msgpool *pool; }; +/* + * connection states + */ +#define CEPH_CON_S_CLOSED 1 +#define CEPH_CON_S_PREOPEN 2 +#define CEPH_CON_S_V1_BANNER 3 +#define CEPH_CON_S_V1_CONNECT_MSG 4 +#define CEPH_CON_S_OPEN 5 +#define CEPH_CON_S_STANDBY 6 + /* ceph connection fault delay defaults, for exponential backoff */ #define BASE_DELAY_INTERVAL (HZ / 4) #define MAX_DELAY_INTERVAL (15 * HZ) @@ -257,7 +267,7 @@ struct ceph_connection { struct ceph_messenger *msgr; - int state; + int state; /* CEPH_CON_S_* */ atomic_t sock_state; struct socket *sock; struct ceph_entity_addr peer_addr; /* peer address */ -- cgit v1.2.3 From 3fefd43e741a5b8d55aeb9115ff488ad2cad439b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Nov 2020 14:56:36 +0100 Subject: libceph: rename and export con->flags bits In preparation for msgr2, move the defines to the header file. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e6be85b67c6c..b6962a0fd76f 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -249,6 +249,17 @@ struct ceph_msg { #define CEPH_CON_S_OPEN 5 #define CEPH_CON_S_STANDBY 6 +/* + * ceph_connection flag bits + */ +#define CEPH_CON_F_LOSSYTX 0 /* we can close channel or drop + messages on errors */ +#define CEPH_CON_F_KEEPALIVE_PENDING 1 /* we need to send a keepalive */ +#define CEPH_CON_F_WRITE_PENDING 2 /* we have data ready to send */ +#define CEPH_CON_F_SOCK_CLOSED 3 /* socket state changed to closed */ +#define CEPH_CON_F_BACKOFF 4 /* need to retry queuing delayed + work */ + /* ceph connection fault delay defaults, for exponential backoff */ #define BASE_DELAY_INTERVAL (HZ / 4) #define MAX_DELAY_INTERVAL (15 * HZ) @@ -273,7 +284,7 @@ struct ceph_connection { struct ceph_entity_addr peer_addr; /* peer address */ struct ceph_entity_addr peer_addr_for_me; - unsigned long flags; + unsigned long flags; /* CEPH_CON_F_* */ const char *error_msg; /* error message, if any */ struct ceph_entity_name peer_name; /* peer name */ -- cgit v1.2.3 From 699921d9e68ff3d9f8645488c12f4689c6533d70 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Nov 2020 14:37:06 +0100 Subject: libceph: export zero_page In preparation for msgr2, make zero_page global. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index b6962a0fd76f..513ed5f90bff 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -343,6 +343,7 @@ struct ceph_connection { unsigned long delay; /* current delay interval */ }; +extern struct page *ceph_zero_page; extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); -- cgit v1.2.3 From 6503e0b69c9d4d78b5450db01e79328f8ed4ef21 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Nov 2020 16:29:47 +0100 Subject: libceph: export remaining protocol independent infrastructure In preparation for msgr2, make all protocol independent functions in messenger.c global. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 513ed5f90bff..93815f1a42b5 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -345,13 +345,50 @@ struct ceph_connection { extern struct page *ceph_zero_page; +void ceph_con_flag_clear(struct ceph_connection *con, unsigned long con_flag); +void ceph_con_flag_set(struct ceph_connection *con, unsigned long con_flag); +bool ceph_con_flag_test(struct ceph_connection *con, unsigned long con_flag); +bool ceph_con_flag_test_and_clear(struct ceph_connection *con, + unsigned long con_flag); +bool ceph_con_flag_test_and_set(struct ceph_connection *con, + unsigned long con_flag); + +void ceph_encode_my_addr(struct ceph_messenger *msgr); + +int ceph_tcp_connect(struct ceph_connection *con); +int ceph_con_close_socket(struct ceph_connection *con); +void ceph_con_reset_session(struct ceph_connection *con); + +u32 ceph_get_global_seq(struct ceph_messenger *msgr, u32 gt); +void ceph_con_discard_sent(struct ceph_connection *con, u64 ack_seq); +void ceph_con_discard_requeued(struct ceph_connection *con, u64 reconnect_seq); + +void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor, + struct ceph_msg *msg, size_t length); +struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, + size_t *page_offset, size_t *length, + bool *last_piece); +void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes); + +u32 ceph_crc32c_page(u32 crc, struct page *page, unsigned int page_offset, + unsigned int length); + +bool ceph_addr_is_blank(const struct ceph_entity_addr *addr); +int ceph_addr_port(const struct ceph_entity_addr *addr); +void ceph_addr_set_port(struct ceph_entity_addr *addr, int p); + +void ceph_con_process_message(struct ceph_connection *con); +int ceph_con_in_msg_alloc(struct ceph_connection *con, + struct ceph_msg_header *hdr, int *skip); +void ceph_con_get_out_msg(struct ceph_connection *con); + + extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); extern int ceph_parse_ips(const char *c, const char *end, struct ceph_entity_addr *addr, int max_count, int *count); - extern int ceph_msgr_init(void); extern void ceph_msgr_exit(void); extern void ceph_msgr_flush(void); -- cgit v1.2.3 From 566050e17e53db283d4e26b73b4b50556f97ce7b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 12 Nov 2020 12:55:39 +0100 Subject: libceph: separate msgr1 protocol implementation In preparation for msgr2, define internal messenger <-> protocol interface (as opposed to external messenger <-> client interface, which is struct ceph_connection_operations) consisting of try_read(), try_write(), revoke(), revoke_incoming(), opened(), reset_session() and reset_protocol() ops. The semantics are exactly the same as they are now. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 93815f1a42b5..8cc8b08eb3dd 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -382,6 +382,14 @@ int ceph_con_in_msg_alloc(struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip); void ceph_con_get_out_msg(struct ceph_connection *con); +int ceph_con_v1_try_read(struct ceph_connection *con); +int ceph_con_v1_try_write(struct ceph_connection *con); +void ceph_con_v1_revoke(struct ceph_connection *con); +void ceph_con_v1_revoke_incoming(struct ceph_connection *con); +bool ceph_con_v1_opened(struct ceph_connection *con); +void ceph_con_v1_reset_session(struct ceph_connection *con); +void ceph_con_v1_reset_protocol(struct ceph_connection *con); + extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); -- cgit v1.2.3 From 2f713615ddd9d805b6c5e79c52e0e11af99d2bf1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 12 Nov 2020 15:48:06 +0100 Subject: libceph: move msgr1 protocol implementation to its own file A pure move, no other changes. Note that ceph_tcp_recv{msg,page}() and ceph_tcp_send{msg,page}() helpers are also moved. msgr2 will bring its own, more efficient, variants based on iov_iter. Switching msgr1 to them was considered but decided against to avoid subtle regressions. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 8cc8b08eb3dd..b5268127c55e 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -382,6 +382,7 @@ int ceph_con_in_msg_alloc(struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip); void ceph_con_get_out_msg(struct ceph_connection *con); +/* messenger_v1.c */ int ceph_con_v1_try_read(struct ceph_connection *con); int ceph_con_v1_try_write(struct ceph_connection *con); void ceph_con_v1_revoke(struct ceph_connection *con); -- cgit v1.2.3 From a56dd9bf47220c3206f27075af8bdfb219a2a3cf Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 12 Nov 2020 16:31:41 +0100 Subject: libceph: move msgr1 protocol specific fields to its own struct A couple whitespace fixups, no functional changes. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 76 +++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index b5268127c55e..54a64e8dfce6 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -264,6 +264,43 @@ struct ceph_msg { #define BASE_DELAY_INTERVAL (HZ / 4) #define MAX_DELAY_INTERVAL (15 * HZ) +struct ceph_connection_v1_info { + struct kvec out_kvec[8], /* sending header/footer data */ + *out_kvec_cur; + int out_kvec_left; /* kvec's left in out_kvec */ + int out_skip; /* skip this many bytes */ + int out_kvec_bytes; /* total bytes left */ + bool out_more; /* there is more data after the kvecs */ + bool out_msg_done; + + struct ceph_auth_handshake *auth; + int auth_retry; /* true if we need a newer authorizer */ + + /* connection negotiation temps */ + u8 in_banner[CEPH_BANNER_MAX_LEN]; + struct ceph_entity_addr actual_peer_addr; + struct ceph_entity_addr peer_addr_for_me; + struct ceph_msg_connect out_connect; + struct ceph_msg_connect_reply in_reply; + + int in_base_pos; /* bytes read */ + + /* message in temps */ + u8 in_tag; /* protocol control byte */ + struct ceph_msg_header in_hdr; + __le64 in_temp_ack; /* for reading an ack */ + + /* message out temps */ + struct ceph_msg_header out_hdr; + __le64 out_temp_ack; /* for writing an ack */ + struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2 + stamp */ + + u32 connect_seq; /* identify the most recent connection + attempt for this session */ + u32 peer_global_seq; /* peer's global seq for this connection */ +}; + /* * A single connection with another host. * @@ -281,21 +318,13 @@ struct ceph_connection { int state; /* CEPH_CON_S_* */ atomic_t sock_state; struct socket *sock; - struct ceph_entity_addr peer_addr; /* peer address */ - struct ceph_entity_addr peer_addr_for_me; unsigned long flags; /* CEPH_CON_F_* */ const char *error_msg; /* error message, if any */ struct ceph_entity_name peer_name; /* peer name */ - + struct ceph_entity_addr peer_addr; /* peer address */ u64 peer_features; - u32 connect_seq; /* identify the most recent connection - attempt for this connection, client */ - u32 peer_global_seq; /* peer's global seq for this connection */ - - struct ceph_auth_handshake *auth; - int auth_retry; /* true if we need a newer authorizer */ struct mutex mutex; @@ -306,41 +335,18 @@ struct ceph_connection { u64 in_seq, in_seq_acked; /* last message received, acked */ - /* connection negotiation temps */ - char in_banner[CEPH_BANNER_MAX_LEN]; - struct ceph_msg_connect out_connect; - struct ceph_msg_connect_reply in_reply; - struct ceph_entity_addr actual_peer_addr; - - /* message out temps */ - struct ceph_msg_header out_hdr; + struct ceph_msg *in_msg; struct ceph_msg *out_msg; /* sending message (== tail of out_sent) */ - bool out_msg_done; - - struct kvec out_kvec[8], /* sending header/footer data */ - *out_kvec_cur; - int out_kvec_left; /* kvec's left in out_kvec */ - int out_skip; /* skip this many bytes */ - int out_kvec_bytes; /* total bytes left */ - int out_more; /* there is more data after the kvecs */ - __le64 out_temp_ack; /* for writing an ack */ - struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2 - stamp */ - /* message in temps */ - struct ceph_msg_header in_hdr; - struct ceph_msg *in_msg; u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ - char in_tag; /* protocol control byte */ - int in_base_pos; /* bytes read */ - __le64 in_temp_ack; /* for reading an ack */ - struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */ struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ + + struct ceph_connection_v1_info v1; }; extern struct page *ceph_zero_page; -- cgit v1.2.3 From 285ea34fc876aa0a2c5e65d310c4a41269e2e5f2 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 26 Oct 2020 16:47:20 +0100 Subject: libceph, ceph: incorporate nautilus cephx changes - request service tickets together with auth ticket. Currently we get auth ticket via CEPHX_GET_AUTH_SESSION_KEY op and then request service tickets via CEPHX_GET_PRINCIPAL_SESSION_KEY op in a separate message. Since nautilus, desired service tickets are shared togther with auth ticket in CEPHX_GET_AUTH_SESSION_KEY reply. - propagate session key and connection secret, if any. In preparation for msgr2, update handle_reply() and verify_authorizer_reply() auth ops to propagate session key and connection secret. Since nautilus, if secure mode is negotiated, connection secret is shared either in CEPHX_GET_AUTH_SESSION_KEY reply (for mons) or in a final authorizer reply (for osds and mdses). Signed-off-by: Ilya Dryomov --- include/linux/ceph/auth.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 6728c2ee0205..d9e7d0bcdaf1 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -53,7 +53,9 @@ struct ceph_auth_client_ops { */ int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end); int (*handle_reply)(struct ceph_auth_client *ac, int result, - void *buf, void *end); + void *buf, void *end, u8 *session_key, + int *session_key_len, u8 *con_secret, + int *con_secret_len); /* * Create authorizer for connecting to a service, and verify @@ -69,7 +71,10 @@ struct ceph_auth_client_ops { void *challenge_buf, int challenge_buf_len); int (*verify_authorizer_reply)(struct ceph_auth_client *ac, - struct ceph_authorizer *a); + struct ceph_authorizer *a, + void *reply, int reply_len, + u8 *session_key, int *session_key_len, + u8 *con_secret, int *con_secret_len); void (*invalidate_authorizer)(struct ceph_auth_client *ac, int peer_type); @@ -126,8 +131,11 @@ int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, struct ceph_authorizer *a, void *challenge_buf, int challenge_buf_len); -extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, - struct ceph_authorizer *a); +int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *reply, int reply_len, + u8 *session_key, int *session_key_len, + u8 *con_secret, int *con_secret_len); extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type); -- cgit v1.2.3 From 59711f9ec219bf5245a8e95989803fb503adc52d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 26 Oct 2020 17:01:53 +0100 Subject: libceph: amend cephx init_protocol() and build_request() In msgr2, initial authentication happens with an exchange of msgr2 control frames -- MAuth message and struct ceph_mon_request_header aren't used. Make that optional. Stop reporting cephx protocol as "x". Use "cephx" instead. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index d44d98033d58..6d986e52000b 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -95,6 +95,7 @@ struct ceph_dir_layout { #define CEPH_AUTH_UID_DEFAULT ((__u64) -1) +const char *ceph_auth_proto_name(int proto); /********************************************* * message layer -- cgit v1.2.3 From c1c0ce78f479cf4d7dfe72c4c1cabbf0bc0730c9 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 26 Oct 2020 17:05:44 +0100 Subject: libceph: drop ac->ops->name field Signed-off-by: Ilya Dryomov --- include/linux/ceph/auth.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index d9e7d0bcdaf1..5f64f66309fa 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -32,8 +32,6 @@ struct ceph_auth_handshake { }; struct ceph_auth_client_ops { - const char *name; - /* * true if we are authenticated and can connect to * services. -- cgit v1.2.3 From a5cbd5fc22d5043a8a76e15d75d031fe24d1f69c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 30 Oct 2020 13:30:51 +0100 Subject: libceph, ceph: get and handle cluster maps with addrvecs In preparation for msgr2, make the cluster send us maps with addrvecs including both LEGACY and MSGR2 addrs instead of a single LEGACY addr. This means advertising support for SERVER_NAUTILUS and also some older features: SERVER_MIMIC, MONENC and MONNAMES. MONNAMES and MONENC are actually pre-argonaut, we just never updated ceph_monmap_decode() for them. Decoding is unconditional, see commit 23c625ce3065 ("libceph: assume argonaut on the server side"). SERVER_MIMIC doesn't bear any meaning for the kernel client. Since ceph_decode_entity_addrvec() is guarded by encoding version checks (and in msgr2 case it is guarded implicitly by the fact that server is speaking msgr2), we assume MSG_ADDR2 for it. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 11 ++++++++--- include/linux/ceph/decode.h | 4 ++++ include/linux/ceph/mdsmap.h | 2 +- include/linux/ceph/osdmap.h | 4 ++-- 4 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 999636d53cf2..3a47acd9cc14 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -8,7 +8,8 @@ * feature. Base case is 1 (first use). */ #define CEPH_FEATURE_INCARNATION_1 (0ull) -#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL +#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // SERVER_JEWEL +#define CEPH_FEATURE_INCARNATION_3 ((1ull<<57)|(1ull<<28)) // SERVER_MIMIC #define DEFINE_CEPH_FEATURE(bit, incarnation, name) \ static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL< Date: Wed, 25 Nov 2020 14:41:59 +0100 Subject: libceph, rbd: ignore addr->type while comparing in some cases For libceph, this ensures that libceph instance sharing (share option) continues to work. For rbd, this avoids blocklisting alive lock owners (locker addr is always LEGACY, while watcher addr is ANY in nautilus). Signed-off-by: Ilya Dryomov --- include/linux/ceph/msgr.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 46939485f2c3..9a897a60f20b 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -52,11 +52,18 @@ extern const char *ceph_entity_type_name(int type); * entity_addr -- network address */ struct ceph_entity_addr { - __le32 type; + __le32 type; /* CEPH_ENTITY_ADDR_TYPE_* */ __le32 nonce; /* unique id for process (e.g. pid) */ struct sockaddr_storage in_addr; } __attribute__ ((packed)); +static inline bool ceph_addr_equal_no_type(const struct ceph_entity_addr *lhs, + const struct ceph_entity_addr *rhs) +{ + return !memcmp(&lhs->in_addr, &rhs->in_addr, sizeof(lhs->in_addr)) && + lhs->nonce == rhs->nonce; +} + struct ceph_entity_inst { struct ceph_entity_name name; struct ceph_entity_addr addr; -- cgit v1.2.3 From 00498b994113a871a556f7ff24a4cf8a00611700 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 19 Nov 2020 16:04:58 +0100 Subject: libceph: introduce connection modes and ms_mode option msgr2 supports two connection modes: crc (plain) and secure (on-wire encryption). Connection mode is picked by server based on input from client. Introduce ms_mode option: ms_mode=legacy - msgr1 (default) ms_mode=crc - crc mode, if denied fail ms_mode=secure - secure mode, if denied fail ms_mode=prefer-crc - crc mode, if denied agree to secure mode ms_mode=prefer-secure - secure mode, if denied agree to crc mode ms_mode affects all connections, we don't separate connections to mons like it's done in userspace with ms_client_mode vs ms_mon_client_mode. For now the default is legacy, to be flipped to prefer-crc after some time. Signed-off-by: Ilya Dryomov --- include/linux/ceph/auth.h | 8 ++++++-- include/linux/ceph/ceph_fs.h | 6 ++++++ include/linux/ceph/libceph.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 5f64f66309fa..6fc058fe9efa 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -98,11 +98,15 @@ struct ceph_auth_client { const struct ceph_crypto_key *key; /* our secret key */ unsigned want_keys; /* which services we want */ + int preferred_mode; /* CEPH_CON_MODE_* */ + int fallback_mode; /* ditto */ + struct mutex mutex; }; -extern struct ceph_auth_client *ceph_auth_init(const char *name, - const struct ceph_crypto_key *key); +struct ceph_auth_client *ceph_auth_init(const char *name, + const struct ceph_crypto_key *key, + const int *con_modes); extern void ceph_auth_destroy(struct ceph_auth_client *ac); extern void ceph_auth_reset(struct ceph_auth_client *ac); diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 6d986e52000b..ce22d5469670 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -93,9 +93,15 @@ struct ceph_dir_layout { #define CEPH_AUTH_NONE 0x1 #define CEPH_AUTH_CEPHX 0x2 +/* msgr2 protocol modes */ +#define CEPH_CON_MODE_UNKNOWN 0x0 +#define CEPH_CON_MODE_CRC 0x1 +#define CEPH_CON_MODE_SECURE 0x2 + #define CEPH_AUTH_UID_DEFAULT ((__u64) -1) const char *ceph_auth_proto_name(int proto); +const char *ceph_con_mode_name(int mode); /********************************************* * message layer diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index eb5a7ca13f9c..8765a5ad267a 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -53,6 +53,7 @@ struct ceph_options { unsigned long osd_keepalive_timeout; /* jiffies */ unsigned long osd_request_timeout; /* jiffies */ u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */ + int con_modes[2]; /* CEPH_CON_MODE_* */ /* * any type that can't be simply compared or doesn't need -- cgit v1.2.3 From cd1a677cad994021b19665ed476aea63f5d54f31 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 19 Nov 2020 16:59:08 +0100 Subject: libceph, ceph: implement msgr2.1 protocol (crc and secure modes) Implement msgr2.1 wire protocol, available since nautilus 14.2.11 and octopus 15.2.5. msgr2.0 wire protocol is not implemented -- it has several security, integrity and robustness issues and therefore considered deprecated. Signed-off-by: Ilya Dryomov --- include/linux/ceph/auth.h | 36 ++++++++++- include/linux/ceph/ceph_fs.h | 4 ++ include/linux/ceph/decode.h | 4 ++ include/linux/ceph/libceph.h | 9 ++- include/linux/ceph/messenger.h | 136 ++++++++++++++++++++++++++++++++++++++++- include/linux/ceph/msgr.h | 48 +++++++++++++++ 6 files changed, 231 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 6fc058fe9efa..3fbe72ebd779 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -120,8 +120,12 @@ int ceph_auth_entity_name_encode(const char *name, void **p, void *end); extern int ceph_build_auth(struct ceph_auth_client *ac, void *msg_buf, size_t msg_len); - extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); + +int __ceph_auth_get_authorizer(struct ceph_auth_client *ac, + struct ceph_auth_handshake *auth, + int peer_type, bool force_new, + int *proto, int *pref_mode, int *fallb_mode); extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth); @@ -157,4 +161,34 @@ int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth, return auth->check_message_signature(auth, msg); return 0; } + +int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len); +int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply, + int reply_len, void *buf, int buf_len); +int ceph_auth_handle_reply_done(struct ceph_auth_client *ac, + u64 global_id, void *reply, int reply_len, + u8 *session_key, int *session_key_len, + u8 *con_secret, int *con_secret_len); +bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac, + int used_proto, int result, + const int *allowed_protos, int proto_cnt, + const int *allowed_modes, int mode_cnt); + +int ceph_auth_get_authorizer(struct ceph_auth_client *ac, + struct ceph_auth_handshake *auth, + int peer_type, void *buf, int *buf_len); +int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac, + struct ceph_auth_handshake *auth, + void *reply, int reply_len, + void *buf, int *buf_len); +int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac, + struct ceph_auth_handshake *auth, + void *reply, int reply_len, + u8 *session_key, int *session_key_len, + u8 *con_secret, int *con_secret_len); +bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac, + int peer_type, int used_proto, int result, + const int *allowed_protos, int proto_cnt, + const int *allowed_modes, int mode_cnt); + #endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ce22d5469670..e41a811026f6 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -93,6 +93,10 @@ struct ceph_dir_layout { #define CEPH_AUTH_NONE 0x1 #define CEPH_AUTH_CEPHX 0x2 +#define CEPH_AUTH_MODE_NONE 0 +#define CEPH_AUTH_MODE_AUTHORIZER 1 +#define CEPH_AUTH_MODE_MON 10 + /* msgr2 protocol modes */ #define CEPH_CON_MODE_UNKNOWN 0x0 #define CEPH_CON_MODE_CRC 0x1 diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 9a934e04f841..04f3ace5787b 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -221,6 +221,7 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv, #define CEPH_ENTITY_ADDR_TYPE_NONE 0 #define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1) #define CEPH_ENTITY_ADDR_TYPE_MSGR2 __cpu_to_le32(2) +#define CEPH_ENTITY_ADDR_TYPE_ANY __cpu_to_le32(3) static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a) { @@ -243,6 +244,9 @@ extern int ceph_decode_entity_addr(void **p, void *end, int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2, struct ceph_entity_addr *addr); +int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr); +void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr); + /* * encoders */ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 8765a5ad267a..eb9008bb3992 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -31,10 +31,10 @@ #define CEPH_OPT_FSID (1<<0) #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ -#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ +#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes (msgr1) */ #define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */ #define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */ -#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */ +#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs (msgr1) */ #define CEPH_OPT_ABORT_ON_FULL (1<<7) /* abort w/ ENOSPC when full */ #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) @@ -84,6 +84,7 @@ struct ceph_options { #define CEPH_MONC_HUNT_BACKOFF 2 #define CEPH_MONC_HUNT_MAX_MULT 10 +#define CEPH_MSG_MAX_CONTROL_LEN (16*1024*1024) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) @@ -152,6 +153,10 @@ struct ceph_client { #define from_msgr(ms) container_of(ms, struct ceph_client, msgr) +static inline bool ceph_msgr2(struct ceph_client *client) +{ + return client->options->con_modes[0] != CEPH_CON_MODE_UNKNOWN; +} /* * snapshots diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 54a64e8dfce6..0e6e9ad3c3bf 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -3,6 +3,7 @@ #define __FS_CEPH_MESSENGER_H #include +#include #include #include #include @@ -52,6 +53,23 @@ struct ceph_connection_operations { int (*sign_message) (struct ceph_msg *msg); int (*check_message_signature) (struct ceph_msg *msg); + + /* msgr2 authentication exchange */ + int (*get_auth_request)(struct ceph_connection *con, + void *buf, int *buf_len, + void **authorizer, int *authorizer_len); + int (*handle_auth_reply_more)(struct ceph_connection *con, + void *reply, int reply_len, + void *buf, int *buf_len, + void **authorizer, int *authorizer_len); + int (*handle_auth_done)(struct ceph_connection *con, + u64 global_id, void *reply, int reply_len, + u8 *session_key, int *session_key_len, + u8 *con_secret, int *con_secret_len); + int (*handle_auth_bad_method)(struct ceph_connection *con, + int used_proto, int result, + const int *allowed_protos, int proto_cnt, + const int *allowed_modes, int mode_cnt); }; /* use format string %s%lld */ @@ -246,8 +264,15 @@ struct ceph_msg { #define CEPH_CON_S_PREOPEN 2 #define CEPH_CON_S_V1_BANNER 3 #define CEPH_CON_S_V1_CONNECT_MSG 4 -#define CEPH_CON_S_OPEN 5 -#define CEPH_CON_S_STANDBY 6 +#define CEPH_CON_S_V2_BANNER_PREFIX 5 +#define CEPH_CON_S_V2_BANNER_PAYLOAD 6 +#define CEPH_CON_S_V2_HELLO 7 +#define CEPH_CON_S_V2_AUTH 8 +#define CEPH_CON_S_V2_AUTH_SIGNATURE 9 +#define CEPH_CON_S_V2_SESSION_CONNECT 10 +#define CEPH_CON_S_V2_SESSION_RECONNECT 11 +#define CEPH_CON_S_OPEN 12 +#define CEPH_CON_S_STANDBY 13 /* * ceph_connection flag bits @@ -301,6 +326,99 @@ struct ceph_connection_v1_info { u32 peer_global_seq; /* peer's global seq for this connection */ }; +#define CEPH_CRC_LEN 4 +#define CEPH_GCM_KEY_LEN 16 +#define CEPH_GCM_IV_LEN sizeof(struct ceph_gcm_nonce) +#define CEPH_GCM_BLOCK_LEN 16 +#define CEPH_GCM_TAG_LEN 16 + +#define CEPH_PREAMBLE_LEN 32 +#define CEPH_PREAMBLE_INLINE_LEN 48 +#define CEPH_PREAMBLE_PLAIN_LEN CEPH_PREAMBLE_LEN +#define CEPH_PREAMBLE_SECURE_LEN (CEPH_PREAMBLE_LEN + \ + CEPH_PREAMBLE_INLINE_LEN + \ + CEPH_GCM_TAG_LEN) +#define CEPH_EPILOGUE_PLAIN_LEN (1 + 3 * CEPH_CRC_LEN) +#define CEPH_EPILOGUE_SECURE_LEN (CEPH_GCM_BLOCK_LEN + CEPH_GCM_TAG_LEN) + +#define CEPH_FRAME_MAX_SEGMENT_COUNT 4 + +struct ceph_frame_desc { + int fd_tag; /* FRAME_TAG_* */ + int fd_seg_cnt; + int fd_lens[CEPH_FRAME_MAX_SEGMENT_COUNT]; /* logical */ + int fd_aligns[CEPH_FRAME_MAX_SEGMENT_COUNT]; +}; + +struct ceph_gcm_nonce { + __le32 fixed; + __le64 counter __packed; +}; + +struct ceph_connection_v2_info { + struct iov_iter in_iter; + struct kvec in_kvecs[5]; /* recvmsg */ + struct bio_vec in_bvec; /* recvmsg (in_cursor) */ + int in_kvec_cnt; + int in_state; /* IN_S_* */ + + struct iov_iter out_iter; + struct kvec out_kvecs[8]; /* sendmsg */ + struct bio_vec out_bvec; /* sendpage (out_cursor, out_zero), + sendmsg (out_enc_pages) */ + int out_kvec_cnt; + int out_state; /* OUT_S_* */ + + int out_zero; /* # of zero bytes to send */ + bool out_iter_sendpage; /* use sendpage if possible */ + + struct ceph_frame_desc in_desc; + struct ceph_msg_data_cursor in_cursor; + struct ceph_msg_data_cursor out_cursor; + + struct crypto_shash *hmac_tfm; /* post-auth signature */ + struct crypto_aead *gcm_tfm; /* on-wire encryption */ + struct aead_request *gcm_req; + struct crypto_wait gcm_wait; + struct ceph_gcm_nonce in_gcm_nonce; + struct ceph_gcm_nonce out_gcm_nonce; + + struct page **out_enc_pages; + int out_enc_page_cnt; + int out_enc_resid; + int out_enc_i; + + int con_mode; /* CEPH_CON_MODE_* */ + + void *conn_bufs[16]; + int conn_buf_cnt; + + struct kvec in_sign_kvecs[8]; + struct kvec out_sign_kvecs[8]; + int in_sign_kvec_cnt; + int out_sign_kvec_cnt; + + u64 client_cookie; + u64 server_cookie; + u64 global_seq; + u64 connect_seq; + u64 peer_global_seq; + + u8 in_buf[CEPH_PREAMBLE_SECURE_LEN]; + u8 out_buf[CEPH_PREAMBLE_SECURE_LEN]; + struct { + u8 late_status; /* FRAME_LATE_STATUS_* */ + union { + struct { + u32 front_crc; + u32 middle_crc; + u32 data_crc; + } __packed; + u8 pad[CEPH_GCM_BLOCK_LEN - 1]; + }; + } out_epil; +}; + /* * A single connection with another host. * @@ -346,7 +464,10 @@ struct ceph_connection { struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ - struct ceph_connection_v1_info v1; + union { + struct ceph_connection_v1_info v1; + struct ceph_connection_v2_info v2; + }; }; extern struct page *ceph_zero_page; @@ -397,6 +518,15 @@ bool ceph_con_v1_opened(struct ceph_connection *con); void ceph_con_v1_reset_session(struct ceph_connection *con); void ceph_con_v1_reset_protocol(struct ceph_connection *con); +/* messenger_v2.c */ +int ceph_con_v2_try_read(struct ceph_connection *con); +int ceph_con_v2_try_write(struct ceph_connection *con); +void ceph_con_v2_revoke(struct ceph_connection *con); +void ceph_con_v2_revoke_incoming(struct ceph_connection *con); +bool ceph_con_v2_opened(struct ceph_connection *con); +void ceph_con_v2_reset_session(struct ceph_connection *con); +void ceph_con_v2_reset_protocol(struct ceph_connection *con); + extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 9a897a60f20b..f5e02f6c0655 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -14,9 +14,39 @@ * constant. */ #define CEPH_BANNER "ceph v027" +#define CEPH_BANNER_LEN 9 #define CEPH_BANNER_MAX_LEN 30 +/* + * messenger V2 connection banner prefix. + * The full banner string should have the form: "ceph v2\n" + * the 2 bytes are the length of the remaining banner. + */ +#define CEPH_BANNER_V2 "ceph v2\n" +#define CEPH_BANNER_V2_LEN 8 +#define CEPH_BANNER_V2_PREFIX_LEN (CEPH_BANNER_V2_LEN + sizeof(__le16)) + +/* + * messenger V2 features + */ +#define CEPH_MSGR2_INCARNATION_1 (0ull) + +#define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \ + static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \ + static const uint64_t CEPH_MSGR2_FEATUREMASK_##name = \ + (1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation); + +#define HAVE_MSGR2_FEATURE(x, name) \ + (((x) & (CEPH_MSGR2_FEATUREMASK_##name)) == (CEPH_MSGR2_FEATUREMASK_##name)) + +DEFINE_MSGR2_FEATURE( 0, 1, REVISION_1) // msgr2.1 + +#define CEPH_MSGR2_SUPPORTED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1) + +#define CEPH_MSGR2_REQUIRED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1) + + /* * Rollover-safe type and comparator for 32-bit sequence numbers. * Comparator returns -1, 0, or 1. @@ -158,6 +188,24 @@ struct ceph_msg_header { __le32 crc; /* header crc32c */ } __attribute__ ((packed)); +struct ceph_msg_header2 { + __le64 seq; /* message seq# for this session */ + __le64 tid; /* transaction id */ + __le16 type; /* message type */ + __le16 priority; /* priority. higher value == higher priority */ + __le16 version; /* version of message encoding */ + + __le32 data_pre_padding_len; + __le16 data_off; /* sender: include full offset; + receiver: mask against ~PAGE_MASK */ + + __le64 ack_seq; + __u8 flags; + /* oldest code we think can decode this. unknown if zero. */ + __le16 compat_version; + __le16 reserved; +} __attribute__ ((packed)); + #define CEPH_MSG_PRIO_LOW 64 #define CEPH_MSG_PRIO_DEFAULT 127 #define CEPH_MSG_PRIO_HIGH 196 -- cgit v1.2.3 From 2f0df6cfa325d7106b8a65bc0e02db1086e3f73b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 19 Nov 2020 20:00:10 +0100 Subject: libceph: drop ceph_auth_{create,update}_authorizer() Signed-off-by: Ilya Dryomov --- include/linux/ceph/auth.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 3fbe72ebd779..71b5d481c653 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -126,13 +126,7 @@ int __ceph_auth_get_authorizer(struct ceph_auth_client *ac, struct ceph_auth_handshake *auth, int peer_type, bool force_new, int *proto, int *pref_mode, int *fallb_mode); -extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac, - int peer_type, - struct ceph_auth_handshake *auth); void ceph_auth_destroy_authorizer(struct ceph_authorizer *a); -extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac, - int peer_type, - struct ceph_auth_handshake *a); int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, struct ceph_authorizer *a, void *challenge_buf, -- cgit v1.2.3 From be98e05a67f05ff4c8349a51fcec993a28be718c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 4 Dec 2020 21:02:42 +0100 Subject: dma-buf: Fix kerneldoc formatting I wanted to look up something and noticed the hyperlink doesn't work. While fixing that also noticed a trivial kerneldoc comment typo in the same section, fix that too. Reviewed-by: Michael J. Ruhl Reviewed-by: Simon Ser Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201204200242.2671481-1-daniel.vetter@ffwll.ch --- include/linux/dma-buf-map.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index 583a3a1f9447..278d489e4bdd 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -122,7 +122,7 @@ struct dma_buf_map { /** * DMA_BUF_MAP_INIT_VADDR - Initializes struct dma_buf_map to an address in system memory - * @vaddr: A system-memory address + * @vaddr_: A system-memory address */ #define DMA_BUF_MAP_INIT_VADDR(vaddr_) \ { \ -- cgit v1.2.3 From a313357e704f2617f298333e3e617a38b1719760 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:37 +0100 Subject: genirq: Move irq_has_action() into core code This function uses irq_to_desc() and is going to be used by modules to replace the open coded irq_to_desc() (ab)usage. The final goal is to remove the export of irq_to_desc() so driver cannot fiddle with it anymore. Move it into the core code and fixup the usage sites to include the proper header. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.548936472@linutronix.de --- include/linux/interrupt.h | 1 + include/linux/irqdesc.h | 7 +------ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 870b3251e174..bb8ff9083e7d 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -232,6 +232,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); # define local_irq_enable_in_hardirq() local_irq_enable() #endif +bool irq_has_action(unsigned int irq); extern void disable_irq_nosync(unsigned int irq); extern bool disable_hardirq(unsigned int irq); extern void disable_irq(unsigned int irq); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 5745491303e0..385a4fafe631 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -179,12 +179,7 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, /* Test to see if a driver has successfully requested an irq */ static inline int irq_desc_has_action(struct irq_desc *desc) { - return desc->action != NULL; -} - -static inline int irq_has_action(unsigned int irq) -{ - return irq_desc_has_action(irq_to_desc(irq)); + return desc && desc->action != NULL; } /** -- cgit v1.2.3 From fdd029630434b434b127efc7fba337da28f45658 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:38 +0100 Subject: genirq: Move status flag checks to core These checks are used by modules and prevent the removal of the export of irq_to_desc(). Move the accessor into the core. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.703779349@linutronix.de --- include/linux/irqdesc.h | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 385a4fafe631..308d7db8991f 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -223,28 +223,21 @@ irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, data->chip = chip; } +bool irq_check_status_bit(unsigned int irq, unsigned int bitmask); + static inline bool irq_balancing_disabled(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_NO_BALANCING_MASK; + return irq_check_status_bit(irq, IRQ_NO_BALANCING_MASK); } static inline bool irq_is_percpu(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_PER_CPU; + return irq_check_status_bit(irq, IRQ_PER_CPU); } static inline bool irq_is_percpu_devid(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_PER_CPU_DEVID; + return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); } static inline void -- cgit v1.2.3 From f1c6306c0d6b50844ba02c8a53e35405e9c0db05 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:39 +0100 Subject: genirq: Move irq_set_lockdep_class() to core irq_set_lockdep_class() is used from modules and requires irq_to_desc() to be exported. Move it into the core code which lifts another requirement for the export. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.860029489@linutronix.de --- include/linux/irqdesc.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 308d7db8991f..4a1d016716f4 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -240,16 +240,14 @@ static inline bool irq_is_percpu_devid(unsigned int irq) return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); } +void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class); static inline void irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, struct lock_class_key *request_class) { - struct irq_desc *desc = irq_to_desc(irq); - - if (desc) { - lockdep_set_class(&desc->lock, lock_class); - lockdep_set_class(&desc->request_mutex, request_class); - } + if (IS_ENABLED(CONFIG_LOCKDEP)) + __irq_set_lockdep_class(irq, lock_class, request_class); } #endif -- cgit v1.2.3 From 3e2380123fb96987ce958f623207010c667ffa7c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:40 +0100 Subject: genirq: Provide irq_get_effective_affinity() Provide an accessor to the effective interrupt affinity mask. Going to be used to replace open coded fiddling with the irq descriptor. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.967177918@linutronix.de --- include/linux/irq.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index c332871d59da..4aeb1c4c7e07 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -906,6 +906,13 @@ struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) } #endif +static inline struct cpumask *irq_get_effective_affinity_mask(unsigned int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + + return d ? irq_data_get_effective_affinity_mask(d) : NULL; +} + unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, -- cgit v1.2.3 From 26c19d0a8610fb233b31730fe26a31145f2d9796 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:43 +0100 Subject: genirq: Make kstat_irqs() static No more users outside the core code. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194043.268774449@linutronix.de --- include/linux/kernel_stat.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 89f0745c096d..44ae1a7eb9e3 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -67,7 +67,6 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) /* * Number of interrupts per specific IRQ source, since bootup */ -extern unsigned int kstat_irqs(unsigned int irq); extern unsigned int kstat_irqs_usr(unsigned int irq); /* -- cgit v1.2.3 From 501e2db67fa4264b517de5c7934e94cca89b3a1e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:44 +0100 Subject: genirq: Provide kstat_irqdesc_cpu() Most users of kstat_irqs_cpu() have the irq descriptor already. No point in calling into the core code and looking it up once more. Use it in per_cpu_count_show() to start with. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194043.362094758@linutronix.de --- include/linux/irqdesc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 4a1d016716f4..891b323266df 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -113,6 +113,12 @@ static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif +static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc, + unsigned int cpu) +{ + return desc->kstat_irqs ? *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; +} + static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) { return container_of(data->common, struct irq_desc, irq_common_data); -- cgit v1.2.3 From ee2cc4276ba4909438f5894a218877660e1536d9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Dec 2020 21:08:00 +0100 Subject: cpufreq: Add special-purpose fast-switching callback for drivers First off, some cpufreq drivers (eg. intel_pstate) can pass hints beyond the current target frequency to the hardware and there are no provisions for doing that in the cpufreq framework. In particular, today the driver has to assume that it should not allow the frequency to fall below the one requested by the governor (or the required capacity may not be provided) which may not be the case and which may lead to excessive energy usage in some scenarios. Second, the hints passed by these drivers to the hardware need not be in terms of the frequency, so representing the utilization numbers coming from the scheduler as frequency before passing them to those drivers is not really useful. Address the two points above by adding a special-purpose replacement for the ->fast_switch callback, called ->adjust_perf, allowing the governor to pass abstract performance level (rather than frequency) values for the minimum (required) and target (desired) performance along with the CPU capacity to compare them to. Also update the schedutil governor to use the new callback instead of ->fast_switch if present and if the utilization mertics are frequency-invariant (that is requisite for the direct mapping between the utilization and the CPU performance levels to be a reasonable approximation). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 14 ++++++++++++++ include/linux/sched/cpufreq.h | 5 +++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 584fccd4fcab..9c8b7437b6cd 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -320,6 +320,15 @@ struct cpufreq_driver { unsigned int index); unsigned int (*fast_switch)(struct cpufreq_policy *policy, unsigned int target_freq); + /* + * ->fast_switch() replacement for drivers that use an internal + * representation of performance levels and can pass hints other than + * the target performance level to the hardware. + */ + void (*adjust_perf)(unsigned int cpu, + unsigned long min_perf, + unsigned long target_perf, + unsigned long capacity); /* * Caches and returns the lowest driver-supported frequency greater than @@ -588,6 +597,11 @@ struct cpufreq_governor { /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq); +void cpufreq_driver_adjust_perf(unsigned int cpu, + unsigned long min_perf, + unsigned long target_perf, + unsigned long capacity); +bool cpufreq_driver_has_adjust_perf(void); int cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index 3ed5aa18593f..6205578ab6ee 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -28,6 +28,11 @@ static inline unsigned long map_util_freq(unsigned long util, { return (freq + (freq >> 2)) * util / cap; } + +static inline unsigned long map_util_perf(unsigned long util) +{ + return util + (util >> 2); +} #endif /* CONFIG_CPU_FREQ */ #endif /* _LINUX_SCHED_CPUFREQ_H */ -- cgit v1.2.3 From f0dbd2bd1c22c6670e83ddcd46a9beb8b575e86d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Dec 2020 19:03:55 -0800 Subject: mm: slab: provide krealloc_array() When allocating an array of elements, users should check for multiplication overflow or preferably use one of the provided helpers like: kmalloc_array(). There's no krealloc_array() counterpart but there are many users who use regular krealloc() to reallocate arrays. Let's provide an actual krealloc_array() implementation. While at it: add some documentation regarding krealloc. Link: https://lkml.kernel.org/r/20201109110654.12547-3-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Acked-by: Vlastimil Babka Cc: Alexander Shishkin Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christian Knig Cc: Christoph Lameter Cc: Daniel Vetter Cc: Daniel Vetter Cc: David Airlie Cc: David Rientjes Cc: Gustavo Padovan Cc: James Morse Cc: Jaroslav Kysela Cc: Jason Wang Cc: Joonsoo Kim Cc: Linus Walleij Cc: Maarten Lankhorst Cc: Mauro Carvalho Chehab Cc: Maxime Ripard Cc: "Michael S . Tsirkin" Cc: Pekka Enberg Cc: Robert Richter Cc: Sumit Semwal Cc: Takashi Iwai Cc: Takashi Iwai Cc: Thomas Zimmermann Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index dd6897f62010..be4ba5867ac5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -592,6 +592,24 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) return __kmalloc(bytes, flags); } +/** + * krealloc_array - reallocate memory for an array. + * @p: pointer to the memory chunk to reallocate + * @new_n: new number of elements to alloc + * @new_size: new size of a single member of the array + * @flags: the type of memory to allocate (see kmalloc) + */ +static __must_check inline void * +krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return krealloc(p, bytes, flags); +} + /** * kcalloc - allocate memory for an array. The memory is set to zero. * @n: number of elements. -- cgit v1.2.3 From 7fb7ab6d618a4dc7ea3f3eafc92388a35b4f8894 Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Mon, 14 Dec 2020 19:04:46 -0800 Subject: mm: fix page_owner initializing issue for arm32 Page owner of pages used by page owner itself used is missing on arm32 targets. The reason is dummy_handle and failure_handle is not initialized correctly. Buddy allocator is used to initialize these two handles. However, buddy allocator is not ready when page owner calls it. This change fixed that by initializing page owner after buddy initialization. The working flow before and after this change are: original logic: 1. allocated memory for page_ext(using memblock). 2. invoke the init callback of page_ext_ops like page_owner(using buddy allocator). 3. initialize buddy. after this change: 1. allocated memory for page_ext(using memblock). 2. initialize buddy. 3. invoke the init callback of page_ext_ops like page_owner(using buddy allocator). with the change, failure/dummy_handle can get its correct value and page owner output for example has the one for page owner itself: Page allocated via order 2, mask 0x6202c0(GFP_USER|__GFP_NOWARN), pid 1006, ts 67278156558 ns PFN 543776 type Unmovable Block 531 type Unmovable Flags 0x0() init_page_owner+0x28/0x2f8 invoke_init_callbacks_flatmem+0x24/0x34 start_kernel+0x33c/0x5d8 Link: https://lkml.kernel.org/r/1603104925-5888-1-git-send-email-zhenhuah@codeaurora.org Signed-off-by: Zhenhua Huang Acked-by: Vlastimil Babka Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ext.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index cfce186f0c4e..aff81ba31bd8 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -44,8 +44,12 @@ static inline void page_ext_init_flatmem(void) { } extern void page_ext_init(void); +static inline void page_ext_init_flatmem_late(void) +{ +} #else extern void page_ext_init_flatmem(void); +extern void page_ext_init_flatmem_late(void); static inline void page_ext_init(void) { } @@ -76,6 +80,10 @@ static inline void page_ext_init(void) { } +static inline void page_ext_init_flatmem_late(void) +{ +} + static inline void page_ext_init_flatmem(void) { } -- cgit v1.2.3 From 57efa1fe5957694fa541c9062de0a127f0b9acb0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Dec 2020 19:05:44 -0800 Subject: mm/gup: prevent gup_fast from racing with COW during fork Since commit 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes") pages under a FOLL_PIN will not be write protected during COW for fork. This means that pages returned from pin_user_pages(FOLL_WRITE) should not become write protected while the pin is active. However, there is a small race where get_user_pages_fast(FOLL_PIN) can establish a FOLL_PIN at the same time copy_present_page() is write protecting it: CPU 0 CPU 1 get_user_pages_fast() internal_get_user_pages_fast() copy_page_range() pte_alloc_map_lock() copy_present_page() atomic_read(has_pinned) == 0 page_maybe_dma_pinned() == false atomic_set(has_pinned, 1); gup_pgd_range() gup_pte_range() pte_t pte = gup_get_pte(ptep) pte_access_permitted(pte) try_grab_compound_head() pte = pte_wrprotect(pte) set_pte_at(); pte_unmap_unlock() // GUP now returns with a write protected page The first attempt to resolve this by using the write protect caused problems (and was missing a barrrier), see commit f3c64eda3e50 ("mm: avoid early COW write protect games during fork()") Instead wrap copy_p4d_range() with the write side of a seqcount and check the read side around gup_pgd_range(). If there is a collision then get_user_pages_fast() fails and falls back to slow GUP. Slow GUP is safe against this race because copy_page_range() is only called while holding the exclusive side of the mmap_lock on the src mm_struct. [akpm@linux-foundation.org: coding style fixes] Link: https://lore.kernel.org/r/CAHk-=wi=iCnYCARbPGjkVJu9eyYeZ13N64tZYLdOB8CP5Q_PLw@mail.gmail.com Link: https://lkml.kernel.org/r/2-v4-908497cf359a+4782-gup_fork_jgg@nvidia.com Fixes: f3c64eda3e50 ("mm: avoid early COW write protect games during fork()") Signed-off-by: Jason Gunthorpe Suggested-by: Linus Torvalds Reviewed-by: John Hubbard Reviewed-by: Jan Kara Reviewed-by: Peter Xu Acked-by: "Ahmed S. Darwish" [seqcount_t parts] Cc: Andrea Arcangeli Cc: "Aneesh Kumar K.V" Cc: Christoph Hellwig Cc: Hugh Dickins Cc: Jann Horn Cc: Kirill Shutemov Cc: Kirill Tkhai Cc: Leon Romanovsky Cc: Michal Hocko Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5a9238f6caad..915f4f100383 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -446,6 +447,13 @@ struct mm_struct { */ atomic_t has_pinned; + /** + * @write_protect_seq: Locked when any thread is write + * protecting pages mapped by this mm to enforce a later COW, + * for instance during page table copying for fork(). + */ + seqcount_t write_protect_seq; + #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif -- cgit v1.2.3 From 52650c8b466bac399aec213c61d74bfe6f7af1a4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Dec 2020 19:05:48 -0800 Subject: mm/gup: remove the vma allocation from gup_longterm_locked() Long ago there wasn't a FOLL_LONGTERM flag so this DAX check was done by post-processing the VMA list. These days it is trivial to just check each VMA to see if it is DAX before processing it inside __get_user_pages() and return failure if a DAX VMA is encountered with FOLL_LONGTERM. Removing the allocation of the VMA list is a significant speed up for many call sites. Add an IS_ENABLED to vma_is_fsdax so that code generation is unchanged when DAX is compiled out. Remove the dummy version of __gup_longterm_locked() as !CONFIG_CMA already makes memalloc_nocma_save(), check_and_migrate_cma_pages(), and memalloc_nocma_restore() into a NOP. Link: https://lkml.kernel.org/r/0-v1-5551df3ed12e+b8-gup_dax_speedup_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Reviewed-by: Ira Weiny Cc: Dan Williams Cc: John Hubbard Cc: Pavel Tatashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8667d0cdc71e..1fcc2b00582b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3230,7 +3230,7 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma) { struct inode *inode; - if (!vma->vm_file) + if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file) return false; if (!vma_is_dax(vma)) return false; -- cgit v1.2.3 From 462680946b6d982afdda3bf5f7de3c379cb8c97b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 14 Dec 2020 19:06:11 -0800 Subject: mm: remove pagevec_lookup_range_nr_tag() With the merge of commit 2e1692966034 ("ceph: have ceph_writepages_start call pagevec_lookup_range_tag"), nothing calls this anymore. Link: https://lkml.kernel.org/r/20201021193926.101474-1-jlayton@kernel.org Signed-off-by: Jeff Layton Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 081d934eda64..ad4ddc17d403 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -43,9 +43,6 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec, unsigned pagevec_lookup_range_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag); -unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, - struct address_space *mapping, pgoff_t *index, pgoff_t end, - xa_mark_t tag, unsigned max_pages); static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, xa_mark_t tag) { -- cgit v1.2.3 From 30e6a51dbb0594d79dc2a9543659c1d596e2f7d4 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Mon, 14 Dec 2020 19:06:14 -0800 Subject: mm/shmem.c: make shmem_mapping() inline shmem_mapping() isn't worth an out-of-line call from any callsite. So make it inline by - make shmem_aops global - export shmem_aops - inline the shmem_mapping() and replace the direct call 'shmem_aops' with shmem_mapping() in shmem.c. Link: https://lkml.kernel.org/r/20201115165207.GA265355@rlk Signed-off-by: Hui Su Acked-by: Vlastimil Babka Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index a5a5d1d4d7b1..d82b6f396588 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -67,7 +67,11 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); #ifdef CONFIG_SHMEM -extern bool shmem_mapping(struct address_space *mapping); +extern const struct address_space_operations shmem_aops; +static inline bool shmem_mapping(struct address_space *mapping) +{ + return mapping->a_ops == &shmem_aops; +} #else static inline bool shmem_mapping(struct address_space *mapping) { -- cgit v1.2.3 From 1a984c4e8200e0e58bb316f14a4bebb28d32d15a Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Mon, 14 Dec 2020 19:06:24 -0800 Subject: mm: memcontrol: remove unused mod_memcg_obj_state() Since commit 991e7673859e ("mm: memcontrol: account kernel stack per node") there is no user of the mod_memcg_obj_state(). So just remove it. Also rework type of the idx parameter of the mod_objcg_state() from int to enum node_stat_item. Link: https://lkml.kernel.org/r/20201013153504.92602-1-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Roman Gushchin Acked-by: David Rientjes Reviewed-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Christopher Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Vlastimil Babka Cc: Yafang Shao Cc: Chris Down Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 922a7f600465..28bf65560084 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -798,8 +798,6 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); -void mod_memcg_obj_state(void *p, int idx, int val); - static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val) { @@ -1255,10 +1253,6 @@ static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, mod_node_page_state(page_pgdat(page), idx, val); } -static inline void mod_memcg_obj_state(void *p, int idx, int val) -{ -} - static inline unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, -- cgit v1.2.3 From 013339df116c2ee0d796dd8bfb8f293a2030c063 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 14 Dec 2020 19:06:39 -0800 Subject: mm/rmap: always do TTU_IGNORE_ACCESS Since commit 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic v2"), the code to check the secondary MMU's page table access bit is broken for !(TTU_IGNORE_ACCESS) because the page is unmapped from the secondary MMU's page table before the check. More specifically for those secondary MMUs which unmap the memory in mmu_notifier_invalidate_range_start() like kvm. However memory reclaim is the only user of !(TTU_IGNORE_ACCESS) or the absence of TTU_IGNORE_ACCESS and it explicitly performs the page table access check before trying to unmap the page. So, at worst the reclaim will miss accesses in a very short window if we remove page table access check in unmapping code. There is an unintented consequence of !(TTU_IGNORE_ACCESS) for the memcg reclaim. From memcg reclaim the page_referenced() only account the accesses from the processes which are in the same memcg of the target page but the unmapping code is considering accesses from all the processes, so, decreasing the effectiveness of memcg reclaim. The simplest solution is to always assume TTU_IGNORE_ACCESS in unmapping code. Link: https://lkml.kernel.org/r/20201104231928.1494083-1-shakeelb@google.com Fixes: 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic v2") Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Hugh Dickins Cc: Jerome Glisse Cc: Vlastimil Babka Cc: Michal Hocko Cc: Andrea Arcangeli Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 3a6adfa70fb0..70085ca1a3fc 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -91,7 +91,6 @@ enum ttu_flags { TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ - TTU_IGNORE_ACCESS = 0x10, /* don't age */ TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will -- cgit v1.2.3 From a7cb874bfff785d39de6cc847673539cb3540821 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 14 Dec 2020 19:06:45 -0800 Subject: mm: memcg: fix obsolete code comments This patch fixes/removes some obsolete comments in the code related to the kernel memory accounting: - kmem_cache->memcg_params.memcg_caches has been removed by commit 9855609bde03 ("mm: memcg/slab: use a single set of kmem_caches for all accounted allocations") - memcg->kmemcg_id is not used as a gate for kmem accounting since commit 0b8f73e10428 ("mm: memcontrol: clean up alloc, online, offline, free functions") Link: https://lkml.kernel.org/r/20201110184615.311974-1-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 28bf65560084..1f0f64fa3ccd 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -296,7 +296,6 @@ struct mem_cgroup { int tcpmem_pressure; #ifdef CONFIG_MEMCG_KMEM - /* Index in the kmem_cache->memcg_params.memcg_caches array */ int kmemcg_id; enum memcg_kmem_state kmem_state; struct obj_cgroup __rcu *objcg; @@ -1562,9 +1561,8 @@ static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg, } /* - * helper for accessing a memcg's index. It will be used as an index in the - * child cache array in kmem_cache, and also to derive its name. This function - * will return -1 when this is not a kmem-limited memcg. + * A helper for accessing memcg's kmem_id, used for getting + * corresponding LRU lists. */ static inline int memcg_cache_id(struct mem_cgroup *memcg) { -- cgit v1.2.3 From bef8620cd8e0a117c1a0719604052e424eb418f9 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 14 Dec 2020 19:06:49 -0800 Subject: mm: memcg: deprecate the non-hierarchical mode Patch series "mm: memcg: deprecate cgroup v1 non-hierarchical mode", v1. The non-hierarchical cgroup v1 mode is a legacy of early days of the memory controller and doesn't bring any value today. However, it complicates the code and creates many edge cases all over the memory controller code. It's a good time to deprecate it completely. This patchset removes the internal logic, adjusts the user interface and updates the documentation. The alt patch removes some bits of the cgroup core code, which become obsolete. Michal Hocko said: "All that we know today is that we have a warning in place to complain loudly when somebody relies on use_hierarchy=0 with a deeper hierarchy. For all those years we have seen _zero_ reports that would describe a sensible usecase. Moreover we (SUSE) have backported this warning into old distribution kernels (since 3.0 based kernels) to extend the coverage and didn't hear even for users who adopt new kernels only very slowly. The only report we have seen so far was a LTP test suite which doesn't really reflect any real life usecase" This patch (of 3): The non-hierarchical cgroup v1 mode is a legacy of early days of the memory controller and doesn't bring any value today. However, it complicates the code and creates many edge cases all over the memory controller code. It's a good time to deprecate it completely. Functionally this patch enabled is by default for all cgroups and forbids switching it off. Nothing changes if cgroup v2 is used: hierarchical mode was enforced from scratch. To protect the ABI memory.use_hierarchy interface is preserved with a limited functionality: reading always returns "1", writing of "1" passes silently, writing of any other value fails with -EINVAL and a warning to dmesg (on the first occasion). Link: https://lkml.kernel.org/r/20201110220800.929549-1-guro@fb.com Link: https://lkml.kernel.org/r/20201110220800.929549-2-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Acked-by: David Rientjes Acked-by: Johannes Weiner Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1f0f64fa3ccd..dd992b81bcb7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -234,11 +234,6 @@ struct mem_cgroup { /* vmpressure notifications */ struct vmpressure vmpressure; - /* - * Should the accounting and control be hierarchical, per subtree? - */ - bool use_hierarchy; - /* * Should the OOM killer kill all belonging tasks, had it kill one? */ @@ -588,8 +583,6 @@ static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, { if (root == memcg) return true; - if (!root->use_hierarchy) - return false; return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup); } -- cgit v1.2.3 From 9d9d341df4d519d96e7927941d91f5785c5cea07 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 14 Dec 2020 19:06:55 -0800 Subject: cgroup: remove obsoleted broken_hierarchy and warned_broken_hierarchy With the deprecation of the non-hierarchical mode of the memory controller there are no more examples of broken hierarchies left. Let's remove the cgroup core code which was supposed to print warnings about creating of broken hierarchies. Link: https://lkml.kernel.org/r/20201110220800.929549-4-guro@fb.com Signed-off-by: Roman Gushchin Reviewed-by: Shakeel Butt Acked-by: David Rientjes Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup-defs.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index fee0b5547cd0..559ee05f86b2 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -668,21 +668,6 @@ struct cgroup_subsys { */ bool threaded:1; - /* - * If %false, this subsystem is properly hierarchical - - * configuration, resource accounting and restriction on a parent - * cgroup cover those of its children. If %true, hierarchy support - * is broken in some ways - some subsystems ignore hierarchy - * completely while others are only implemented half-way. - * - * It's now disallowed to create nested cgroups if the subsystem is - * broken and cgroup core will emit a warning message on such - * cases. Eventually, all subsystems will be made properly - * hierarchical and this will go away. - */ - bool broken_hierarchy:1; - bool warned_broken_hierarchy:1; - /* the following two fields are initialized automtically during boot */ int id; const char *name; -- cgit v1.2.3 From da3ceeff923e3bc750a8423c840462760c463926 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Mon, 14 Dec 2020 19:07:04 -0800 Subject: mm: memcg/slab: rename *_lruvec_slab_state to *_lruvec_kmem_state The *_lruvec_slab_state is also suitable for pages allocated from buddy, not just for the slab objects. But the function name seems to tell us that only slab object is applicable. So we can rename the keyword of slab to kmem. Link: https://lkml.kernel.org/r/20201117085249.24319-1-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Roman Gushchin Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index dd992b81bcb7..71c961452d9a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -788,15 +788,15 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); -void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); +void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); -static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, +static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { unsigned long flags; local_irq_save(flags); - __mod_lruvec_slab_state(p, idx, val); + __mod_lruvec_kmem_state(p, idx, val); local_irq_restore(flags); } @@ -1229,7 +1229,7 @@ static inline void mod_lruvec_page_state(struct page *page, mod_node_page_state(page_pgdat(page), idx, val); } -static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, +static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { struct page *page = virt_to_head_page(p); @@ -1237,7 +1237,7 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, __mod_node_page_state(page_pgdat(page), idx, val); } -static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, +static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { struct page *page = virt_to_head_page(p); @@ -1332,14 +1332,14 @@ static inline void __dec_lruvec_page_state(struct page *page, __mod_lruvec_page_state(page, idx, -1); } -static inline void __inc_lruvec_slab_state(void *p, enum node_stat_item idx) +static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) { - __mod_lruvec_slab_state(p, idx, 1); + __mod_lruvec_kmem_state(p, idx, 1); } -static inline void __dec_lruvec_slab_state(void *p, enum node_stat_item idx) +static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx) { - __mod_lruvec_slab_state(p, idx, -1); + __mod_lruvec_kmem_state(p, idx, -1); } /* idx can be of type enum memcg_stat_item or node_stat_item */ -- cgit v1.2.3 From c47d5032ed3002311a4188eae51f4641ec436beb Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 14 Dec 2020 19:07:14 -0800 Subject: mm: move lruvec stats update functions to vmstat.h Patch series "memcg: add pagetable comsumption to memory.stat", v2. Many workloads consumes significant amount of memory in pagetables. One specific use-case is the user space network driver which mmaps the application memory to provide zero copy transfer. This driver can consume a large amount memory in page tables. This patch series exposes the pagetable comsumption for each memory cgroup. This patch (of 2): This does not change any functionality and only move the functions which update the lruvec stats to vmstat.h from memcontrol.h. The main reason for this patch is to be able to use these functions in the page table contructor function which is defined in mm.h and we can not include the memcontrol.h in that file. Also this is a better place for this interface in general. The lruvec abstraction, while invented for memcg, isn't specific to memcg at all. Link: https://lkml.kernel.org/r/20201130212541.2781790-2-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Roman Gushchin Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 111 --------------------------------------------- include/linux/vmstat.h | 104 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 111 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 71c961452d9a..f530d634f055 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -786,8 +786,6 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); -void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, - int val); void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, @@ -810,43 +808,6 @@ static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, local_irq_restore(flags); } -static inline void mod_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_lruvec_state(lruvec, idx, val); - local_irq_restore(flags); -} - -static inline void __mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - struct page *head = compound_head(page); /* rmap on tail pages */ - pg_data_t *pgdat = page_pgdat(page); - struct lruvec *lruvec; - - /* Untracked pages have no memcg, no lruvec. Update only the node */ - if (!head->mem_cgroup) { - __mod_node_page_state(pgdat, idx, val); - return; - } - - lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat); - __mod_lruvec_state(lruvec, idx, val); -} - -static inline void mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_lruvec_page_state(page, idx, val); - local_irq_restore(flags); -} - unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, unsigned long *total_scanned); @@ -1205,30 +1166,6 @@ static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, { } -static inline void __mod_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); -} - -static inline void mod_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - mod_node_page_state(lruvec_pgdat(lruvec), idx, val); -} - -static inline void __mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - __mod_node_page_state(page_pgdat(page), idx, val); -} - -static inline void mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - mod_node_page_state(page_pgdat(page), idx, val); -} - static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { @@ -1308,30 +1245,6 @@ static inline void __dec_memcg_page_state(struct page *page, __mod_memcg_page_state(page, idx, -1); } -static inline void __inc_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - __mod_lruvec_state(lruvec, idx, 1); -} - -static inline void __dec_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - __mod_lruvec_state(lruvec, idx, -1); -} - -static inline void __inc_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - __mod_lruvec_page_state(page, idx, 1); -} - -static inline void __dec_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - __mod_lruvec_page_state(page, idx, -1); -} - static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) { __mod_lruvec_kmem_state(p, idx, 1); @@ -1370,30 +1283,6 @@ static inline void dec_memcg_page_state(struct page *page, mod_memcg_page_state(page, idx, -1); } -static inline void inc_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - mod_lruvec_state(lruvec, idx, 1); -} - -static inline void dec_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - mod_lruvec_state(lruvec, idx, -1); -} - -static inline void inc_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - mod_lruvec_page_state(page, idx, 1); -} - -static inline void dec_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - mod_lruvec_page_state(page, idx, -1); -} - static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) { struct mem_cgroup *memcg; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 322dcbfcc933..773135fc6e19 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -450,4 +450,108 @@ static inline const char *vm_event_name(enum vm_event_item item) } #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ +#ifdef CONFIG_MEMCG + +void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, + int val); + +static inline void mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + unsigned long flags; + + local_irq_save(flags); + __mod_lruvec_state(lruvec, idx, val); + local_irq_restore(flags); +} + +void __mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val); + +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + unsigned long flags; + + local_irq_save(flags); + __mod_lruvec_page_state(page, idx, val); + local_irq_restore(flags); +} + +#else + +static inline void __mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +} + +static inline void mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +} + +static inline void __mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + __mod_node_page_state(page_pgdat(page), idx, val); +} + +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + mod_node_page_state(page_pgdat(page), idx, val); +} + +#endif /* CONFIG_MEMCG */ + +static inline void __inc_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + __mod_lruvec_state(lruvec, idx, 1); +} + +static inline void __dec_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + __mod_lruvec_state(lruvec, idx, -1); +} + +static inline void __inc_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + __mod_lruvec_page_state(page, idx, 1); +} + +static inline void __dec_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + __mod_lruvec_page_state(page, idx, -1); +} + +static inline void inc_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + mod_lruvec_state(lruvec, idx, 1); +} + +static inline void dec_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + mod_lruvec_state(lruvec, idx, -1); +} + +static inline void inc_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + mod_lruvec_page_state(page, idx, 1); +} + +static inline void dec_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + mod_lruvec_page_state(page, idx, -1); +} + #endif /* _LINUX_VMSTAT_H */ -- cgit v1.2.3 From f0c0c115fb81940f4dba0644ac2a8a43b39c83f3 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 14 Dec 2020 19:07:17 -0800 Subject: mm: memcontrol: account pagetables per node For many workloads, pagetable consumption is significant and it makes sense to expose it in the memory.stat for the memory cgroups. However at the moment, the pagetables are accounted per-zone. Converting them to per-node and using the right interface will correctly account for the memory cgroups as well. [akpm@linux-foundation.org: export __mod_lruvec_page_state to modules for arch/mips/kvm/] Link: https://lkml.kernel.org/r/20201130212541.2781790-3-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Roman Gushchin Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++---- include/linux/mmzone.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4e..5bbbf4aeee94 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2203,7 +2203,7 @@ static inline bool pgtable_pte_page_ctor(struct page *page) if (!ptlock_init(page)) return false; __SetPageTable(page); - inc_zone_page_state(page, NR_PAGETABLE); + inc_lruvec_page_state(page, NR_PAGETABLE); return true; } @@ -2211,7 +2211,7 @@ static inline void pgtable_pte_page_dtor(struct page *page) { ptlock_free(page); __ClearPageTable(page); - dec_zone_page_state(page, NR_PAGETABLE); + dec_lruvec_page_state(page, NR_PAGETABLE); } #define pte_offset_map_lock(mm, pmd, address, ptlp) \ @@ -2298,7 +2298,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page) if (!pmd_ptlock_init(page)) return false; __SetPageTable(page); - inc_zone_page_state(page, NR_PAGETABLE); + inc_lruvec_page_state(page, NR_PAGETABLE); return true; } @@ -2306,7 +2306,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page) { pmd_ptlock_free(page); __ClearPageTable(page); - dec_zone_page_state(page, NR_PAGETABLE); + dec_lruvec_page_state(page, NR_PAGETABLE); } /* diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fb3bf696c05e..cca2a4443c9c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -152,7 +152,6 @@ enum zone_stat_item { NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ - NR_PAGETABLE, /* used for pagetables */ /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) @@ -207,6 +206,7 @@ enum node_stat_item { #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) NR_KERNEL_SCS_KB, /* measured in KiB */ #endif + NR_PAGETABLE, /* used for pagetables */ NR_VM_NODE_STAT_ITEMS }; -- cgit v1.2.3 From d3f5ffcacd1528736471bc78f03f06da6c4551cc Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:07:45 -0800 Subject: mm: cleanup: remove unused tsk arg from __access_remote_vm Despite a comment that said that page fault accounting would be charged to whatever task_struct* was passed into __access_remote_vm(), the tsk argument was actually unused. Making page fault accounting actually use this task struct is quite a project, so there is no point in keeping the tsk argument. Delete both the comment, and the argument. [rppt@linux.ibm.com: changelog addition] Link: https://lkml.kernel.org/r/20201026074137.4147787-1-jhubbard@nvidia.com Signed-off-by: John Hubbard Reviewed-by: Mike Rapoport Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5bbbf4aeee94..1d8e84bf718a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1716,8 +1716,8 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); -extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, unsigned int gup_flags); +extern int __access_remote_vm(struct mm_struct *mm, unsigned long addr, + void *buf, int len, unsigned int gup_flags); long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, -- cgit v1.2.3 From 2b5067a8143e34aa3fa57a20fb8a3c40d905f942 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Mon, 14 Dec 2020 19:07:55 -0800 Subject: mm: mmap_lock: add tracepoints around lock acquisition The goal of these tracepoints is to be able to debug lock contention issues. This lock is acquired on most (all?) mmap / munmap / page fault operations, so a multi-threaded process which does a lot of these can experience significant contention. We trace just before we start acquisition, when the acquisition returns (whether it succeeded or not), and when the lock is released (or downgraded). The events are broken out by lock type (read / write). The events are also broken out by memcg path. For container-based workloads, users often think of several processes in a memcg as a single logical "task", so collecting statistics at this level is useful. The end goal is to get latency information. This isn't directly included in the trace events. Instead, users are expected to compute the time between "start locking" and "acquire returned", using e.g. synthetic events or BPF. The benefit we get from this is simpler code. Because we use tracepoint_enabled() to decide whether or not to trace, this patch has effectively no overhead unless tracepoints are enabled at runtime. If tracepoints are enabled, there is a performance impact, but how much depends on exactly what e.g. the BPF program does. [axelrasmussen@google.com: fix use-after-free race and css ref leak in tracepoints] Link: https://lkml.kernel.org/r/20201130233504.3725241-1-axelrasmussen@google.com [axelrasmussen@google.com: v3] Link: https://lkml.kernel.org/r/20201207213358.573750-1-axelrasmussen@google.com [rostedt@goodmis.org: in-depth examples of tracepoint_enabled() usage, and per-cpu-per-context buffer design] Link: https://lkml.kernel.org/r/20201105211739.568279-2-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Acked-by: Vlastimil Babka Cc: Steven Rostedt Cc: Ingo Molnar Cc: Michel Lespinasse Cc: Daniel Jordan Cc: Jann Horn Cc: Chinwen Chang Cc: Davidlohr Bueso Cc: David Rientjes Cc: Laurent Dufour Cc: Yafang Shao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmap_lock.h | 94 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 18e7eae9b5ba..0540f0156f58 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -1,11 +1,65 @@ #ifndef _LINUX_MMAP_LOCK_H #define _LINUX_MMAP_LOCK_H +#include +#include #include +#include +#include +#include #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), +DECLARE_TRACEPOINT(mmap_lock_start_locking); +DECLARE_TRACEPOINT(mmap_lock_acquire_returned); +DECLARE_TRACEPOINT(mmap_lock_released); + +#ifdef CONFIG_TRACING + +void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write); +void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, + bool success); +void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write); + +static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, + bool write) +{ + if (tracepoint_enabled(mmap_lock_start_locking)) + __mmap_lock_do_trace_start_locking(mm, write); +} + +static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, + bool write, bool success) +{ + if (tracepoint_enabled(mmap_lock_acquire_returned)) + __mmap_lock_do_trace_acquire_returned(mm, write, success); +} + +static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) +{ + if (tracepoint_enabled(mmap_lock_released)) + __mmap_lock_do_trace_released(mm, write); +} + +#else /* !CONFIG_TRACING */ + +static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, + bool write) +{ +} + +static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, + bool write, bool success) +{ +} + +static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) +{ +} + +#endif /* CONFIG_TRACING */ + static inline void mmap_init_lock(struct mm_struct *mm) { init_rwsem(&mm->mmap_lock); @@ -13,57 +67,86 @@ static inline void mmap_init_lock(struct mm_struct *mm) static inline void mmap_write_lock(struct mm_struct *mm) { + __mmap_lock_trace_start_locking(mm, true); down_write(&mm->mmap_lock); + __mmap_lock_trace_acquire_returned(mm, true, true); } static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) { + __mmap_lock_trace_start_locking(mm, true); down_write_nested(&mm->mmap_lock, subclass); + __mmap_lock_trace_acquire_returned(mm, true, true); } static inline int mmap_write_lock_killable(struct mm_struct *mm) { - return down_write_killable(&mm->mmap_lock); + int ret; + + __mmap_lock_trace_start_locking(mm, true); + ret = down_write_killable(&mm->mmap_lock); + __mmap_lock_trace_acquire_returned(mm, true, ret == 0); + return ret; } static inline bool mmap_write_trylock(struct mm_struct *mm) { - return down_write_trylock(&mm->mmap_lock) != 0; + bool ret; + + __mmap_lock_trace_start_locking(mm, true); + ret = down_write_trylock(&mm->mmap_lock) != 0; + __mmap_lock_trace_acquire_returned(mm, true, ret); + return ret; } static inline void mmap_write_unlock(struct mm_struct *mm) { up_write(&mm->mmap_lock); + __mmap_lock_trace_released(mm, true); } static inline void mmap_write_downgrade(struct mm_struct *mm) { downgrade_write(&mm->mmap_lock); + __mmap_lock_trace_acquire_returned(mm, false, true); } static inline void mmap_read_lock(struct mm_struct *mm) { + __mmap_lock_trace_start_locking(mm, false); down_read(&mm->mmap_lock); + __mmap_lock_trace_acquire_returned(mm, false, true); } static inline int mmap_read_lock_killable(struct mm_struct *mm) { - return down_read_killable(&mm->mmap_lock); + int ret; + + __mmap_lock_trace_start_locking(mm, false); + ret = down_read_killable(&mm->mmap_lock); + __mmap_lock_trace_acquire_returned(mm, false, ret == 0); + return ret; } static inline bool mmap_read_trylock(struct mm_struct *mm) { - return down_read_trylock(&mm->mmap_lock) != 0; + bool ret; + + __mmap_lock_trace_start_locking(mm, false); + ret = down_read_trylock(&mm->mmap_lock) != 0; + __mmap_lock_trace_acquire_returned(mm, false, ret); + return ret; } static inline void mmap_read_unlock(struct mm_struct *mm) { up_read(&mm->mmap_lock); + __mmap_lock_trace_released(mm, false); } static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm) { - if (down_read_trylock(&mm->mmap_lock)) { + if (mmap_read_trylock(mm)) { rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_); return true; } @@ -73,6 +156,7 @@ static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm) static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) { up_read_non_owner(&mm->mmap_lock); + __mmap_lock_trace_released(mm, false); } static inline void mmap_assert_locked(struct mm_struct *mm) -- cgit v1.2.3 From 0966aeb404e854e3377a10fcd01be46f19055bc6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 14 Dec 2020 19:08:02 -0800 Subject: mm: move free_unref_page to mm/internal.h Code outside mm/ should not be calling free_unref_page(). Also move free_unref_page_list(). Link: https://lkml.kernel.org/r/20201125034655.27687-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Reviewed-by: Mike Rapoport Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index c603237e006c..6e479e9c48ce 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -580,8 +580,6 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); -extern void free_unref_page(struct page *page); -extern void free_unref_page_list(struct list_head *list); struct page_frag_cache; extern void __page_frag_cache_drain(struct page *page, unsigned int count); -- cgit v1.2.3 From cd544fd1dc9293c6702fab6effa63dac1cc67e99 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 14 Dec 2020 19:08:13 -0800 Subject: mremap: don't allow MREMAP_DONTUNMAP on special_mappings and aio As kernel expect to see only one of such mappings, any further operations on the VMA-copy may be unexpected by the kernel. Maybe it's being on the safe side, but there doesn't seem to be any expected use-case for this, so restrict it now. Link: https://lkml.kernel.org/r/20201013013416.390574-4-dima@arista.com Fixes: commit e346b3813067 ("mm/mremap: add MREMAP_DONTUNMAP to mremap()") Signed-off-by: Dmitry Safonov Cc: Alexander Viro Cc: Andy Lutomirski Cc: Brian Geffon Cc: Catalin Marinas Cc: Dan Carpenter Cc: Dan Williams Cc: Dave Jiang Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: John Hubbard Cc: "Kirill A. Shutemov" Cc: Mike Kravetz Cc: Minchan Kim Cc: Ralph Campbell Cc: Russell King Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vishal Verma Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1d8e84bf718a..dacc889744b1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -558,7 +558,7 @@ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); int (*split)(struct vm_area_struct * area, unsigned long addr); - int (*mremap)(struct vm_area_struct * area); + int (*mremap)(struct vm_area_struct *area, unsigned long flags); vm_fault_t (*fault)(struct vm_fault *vmf); vm_fault_t (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); -- cgit v1.2.3 From dd3b614f858d88f33e0cf8b7353e2ad937e71da3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 14 Dec 2020 19:08:17 -0800 Subject: vm_ops: rename .split() callback to .may_split() Rename the callback to reflect that it's not called *on* or *after* split, but rather some time before the splitting to check if it's possible. Link: https://lkml.kernel.org/r/20201013013416.390574-5-dima@arista.com Signed-off-by: Dmitry Safonov Cc: Alexander Viro Cc: Andy Lutomirski Cc: Brian Geffon Cc: Catalin Marinas Cc: Dan Carpenter Cc: Dan Williams Cc: Dave Jiang Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: John Hubbard Cc: "Kirill A. Shutemov" Cc: Mike Kravetz Cc: Minchan Kim Cc: Ralph Campbell Cc: Russell King Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vishal Verma Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index dacc889744b1..5d188b8611dc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -557,7 +557,8 @@ enum page_entry_size { struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); - int (*split)(struct vm_area_struct * area, unsigned long addr); + /* Called any time before splitting to check if it's allowed */ + int (*may_split)(struct vm_area_struct *area, unsigned long addr); int (*mremap)(struct vm_area_struct *area, unsigned long flags); vm_fault_t (*fault)(struct vm_fault *vmf); vm_fault_t (*huge_fault)(struct vm_fault *vmf, -- cgit v1.2.3 From 95d6c701f4ca7c44dc148d664f604541266a2333 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 14 Dec 2020 19:08:34 -0800 Subject: mm: extract might_alloc() debug check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracted from slab.h, which seems to have the most complete version including the correct might_sleep() check. Roll it out to slob.c. Motivated by a discussion with Paul about possibly changing call_rcu behaviour to allocate memory, but only roughly every 500th call. There are a lot fewer places in the kernel that care about whether allocating memory is allowed or not (due to deadlocks with reclaim code) than places that care whether sleeping is allowed. But debugging these also tends to be a lot harder, so nice descriptive checks could come in handy. I might have some use eventually for annotations in drivers/gpu. Note that unlike fs_reclaim_acquire/release gfpflags_allow_blocking does not consult the PF_MEMALLOC flags. But there is no flag equivalent for GFP_NOWAIT, hence this check can't go wrong due to memalloc_no*_save/restore contexts. Willy is working on a patch series which might change this: https://lore.kernel.org/linux-mm/20200625113122.7540-7-willy@infradead.org/ I think best would be if that updates gfpflags_allow_blocking(), since there's a ton of callers all over the place for that already. Link: https://lkml.kernel.org/r/20201125162532.1299794-3-daniel.vetter@ffwll.ch Signed-off-by: Daniel Vetter Acked-by: Vlastimil Babka Acked-by: Paul E. McKenney Reviewed-by: Jason Gunthorpe Cc: Randy Dunlap Cc: Paul E. McKenney Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Vlastimil Babka Cc: Mathieu Desnoyers Cc: Sebastian Andrzej Siewior Cc: Michel Lespinasse Cc: Daniel Vetter Cc: Waiman Long Cc: Thomas Gleixner Cc: Randy Dunlap Cc: Dave Chinner Cc: Qian Cai Cc: "Matthew Wilcox (Oracle)" Cc: Christian König Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Maarten Lankhorst Cc: Thomas Hellström (Intel) Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index d5ece7a9a403..a11a61b5226f 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -180,6 +180,22 @@ static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } static inline void fs_reclaim_release(gfp_t gfp_mask) { } #endif +/** + * might_alloc - Mark possible allocation sites + * @gfp_mask: gfp_t flags that would be used to allocate + * + * Similar to might_sleep() and other annotations, this can be used in functions + * that might allocate, but often don't. Compiles to nothing without + * CONFIG_LOCKDEP. Includes a conditional might_sleep() if @gfp allows blocking. + */ +static inline void might_alloc(gfp_t gfp_mask) +{ + fs_reclaim_acquire(gfp_mask); + fs_reclaim_release(gfp_mask); + + might_sleep_if(gfpflags_allow_blocking(gfp_mask)); +} + /** * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. * -- cgit v1.2.3 From 96e2db456135db0cf2476b6890f1e8b2fdcf21eb Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Mon, 14 Dec 2020 19:08:49 -0800 Subject: mm/vmalloc: rework the drain logic A current "lazy drain" model suffers from at least two issues. First one is related to the unsorted list of vmap areas, thus in order to identify the [min:max] range of areas to be drained, it requires a full list scan. What is a time consuming if the list is too long. Second one and as a next step is about merging all fragments with a free space. What is also a time consuming because it has to iterate over entire list which holds outstanding lazy areas. See below the "preemptirqsoff" tracer that illustrates a high latency. It is ~24676us. Our workloads like audio and video are effected by such long latency: tracer: preemptirqsoff preemptirqsoff latency trace v1.1.5 on 4.9.186-perf+ -------------------------------------------------------------------- latency: 24676 us, #4/4, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 P:8) ----------------- | task: crtc_commit:112-261 (uid:0 nice:0 policy:1 rt_prio:16) ----------------- => started at: __purge_vmap_area_lazy => ended at: __purge_vmap_area_lazy _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / delay cmd pid ||||| time | caller \ / ||||| \ | / crtc_com-261 1...1 1us*: _raw_spin_lock <-__purge_vmap_area_lazy [...] crtc_com-261 1...1 24675us : _raw_spin_unlock <-__purge_vmap_area_lazy crtc_com-261 1...1 24677us : trace_preempt_on <-__purge_vmap_area_lazy crtc_com-261 1...1 24683us : => free_vmap_area_noflush => remove_vm_area => __vunmap => vfree => drm_property_free_blob => drm_mode_object_unreference => drm_property_unreference_blob => __drm_atomic_helper_crtc_destroy_state => sde_crtc_destroy_state => drm_atomic_state_default_clear => drm_atomic_state_clear => drm_atomic_state_free => complete_commit => _msm_drm_commit_work_cb => kthread_worker_fn => kthread => ret_from_fork To address those two issues we can redesign a purging of the outstanding lazy areas. Instead of queuing vmap areas to the list, we replace it by the separate rb-tree. In hat case an area is located in the tree/list in ascending order. It will give us below advantages: a) Outstanding vmap areas are merged creating bigger coalesced blocks, thus it becomes less fragmented. b) It is possible to calculate a flush range [min:max] without scanning all elements. It is O(1) access time or complexity; c) The final merge of areas with the rb-tree that represents a free space is faster because of (a). As a result the lock contention is also reduced. Link: https://lkml.kernel.org/r/20201116220033.1837-2-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Cc: Hillf Danton Cc: Michal Hocko Cc: Matthew Wilcox Cc: Oleksiy Avramchenko Cc: Steven Rostedt Cc: Minchan Kim Cc: huang ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 938eaf9517e2..80c0181c411d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -72,16 +72,14 @@ struct vmap_area { struct list_head list; /* address sorted list */ /* - * The following three variables can be packed, because - * a vmap_area object is always one of the three states: + * The following two variables can be packed, because + * a vmap_area object can be either: * 1) in "free" tree (root is vmap_area_root) - * 2) in "busy" tree (root is free_vmap_area_root) - * 3) in purge list (head is vmap_purge_list) + * 2) or "busy" tree (root is free_vmap_area_root) */ union { unsigned long subtree_max_size; /* in "free" tree */ struct vm_struct *vm; /* in "busy" tree */ - struct llist_node purge_list; /* in purge list */ }; }; -- cgit v1.2.3 From 03e92a5e097d679acbd1fb4d2ae238a38158aa0b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:09:32 -0800 Subject: ia64: remove custom __early_pfn_to_nid() The ia64 implementation of __early_pfn_to_nid() essentially relies on the same data as the generic implementation. The correspondence between memory ranges and nodes is set in memblock during early memory initialization in register_active_ranges() function. The initialization of sparsemem that requires early_pfn_to_nid() happens later and it can use the memblock information like the other architectures. Link: https://lkml.kernel.org/r/20201101170454.9567-3-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Alexey Dobriyan Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greg Ungerer Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Matt Turner Cc: Meelis Roos Cc: Michael Schmitz Cc: Russell King Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 --- include/linux/mmzone.h | 11 ----------- 2 files changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5d188b8611dc..48fa3e71be1a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2434,9 +2434,6 @@ static inline int early_pfn_to_nid(unsigned long pfn) #else /* please see mm/page_alloc.c */ extern int __meminit early_pfn_to_nid(unsigned long pfn); -/* there is a per-arch backend function. */ -extern int __meminit __early_pfn_to_nid(unsigned long pfn, - struct mminit_pfnnid_cache *state); #endif extern void set_dma_reserve(unsigned long new_dma_reserve); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index cca2a4443c9c..16fb5522a74f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1428,17 +1428,6 @@ void sparse_init(void); #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ -/* - * During memory init memblocks map pfns to nids. The search is expensive and - * this caches recent lookups. The implementation of __early_pfn_to_nid - * may treat start/end as pfns or sections. - */ -struct mminit_pfnnid_cache { - unsigned long last_start; - unsigned long last_end; - int last_nid; -}; - /* * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we * need to check pfn validity within that MAX_ORDER_NR_PAGES block. -- cgit v1.2.3 From 5e545df3292fbd3d5963c68980f1527ead2a2b3f Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:09:55 -0800 Subject: arm: remove CONFIG_ARCH_HAS_HOLES_MEMORYMODEL ARM is the only architecture that defines CONFIG_ARCH_HAS_HOLES_MEMORYMODEL which in turn enables memmap_valid_within() function that is intended to verify existence of struct page associated with a pfn when there are holes in the memory map. However, the ARCH_HAS_HOLES_MEMORYMODEL also enables HAVE_ARCH_PFN_VALID and arch-specific pfn_valid() implementation that also deals with the holes in the memory map. The only two users of memmap_valid_within() call this function after a call to pfn_valid() so the memmap_valid_within() check becomes redundant. Remove CONFIG_ARCH_HAS_HOLES_MEMORYMODEL and memmap_valid_within() and rely entirely on ARM's implementation of pfn_valid() that is now enabled unconditionally. Link: https://lkml.kernel.org/r/20201101170454.9567-9-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Alexey Dobriyan Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greg Ungerer Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Matt Turner Cc: Meelis Roos Cc: Michael Schmitz Cc: Russell King Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 16fb5522a74f..6e0025b5a88f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1440,37 +1440,6 @@ void sparse_init(void); #define pfn_valid_within(pfn) (1) #endif -#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL -/* - * pfn_valid() is meant to be able to tell if a given PFN has valid memmap - * associated with it or not. This means that a struct page exists for this - * pfn. The caller cannot assume the page is fully initialized in general. - * Hotplugable pages might not have been onlined yet. pfn_to_online_page() - * will ensure the struct page is fully online and initialized. Special pages - * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly. - * - * In FLATMEM, it is expected that holes always have valid memmap as long as - * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed - * that a valid section has a memmap for the entire section. - * - * However, an ARM, and maybe other embedded architectures in the future - * free memmap backing holes to save memory on the assumption the memmap is - * never used. The page_zone linkages are then broken even though pfn_valid() - * returns true. A walker of the full memmap must then do this additional - * check to ensure the memmap they are looking at is sane by making sure - * the zone and PFN linkages are still valid. This is expensive, but walkers - * of the full memmap are extremely rare. - */ -bool memmap_valid_within(unsigned long pfn, - struct page *page, struct zone *zone); -#else -static inline bool memmap_valid_within(unsigned long pfn, - struct page *page, struct zone *zone) -{ - return true; -} -#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ - #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ -- cgit v1.2.3 From 77bc7fd607dee2ffb28daff6d0dd8ae42af61ea8 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:10:20 -0800 Subject: mm: introduce debug_pagealloc_{map,unmap}_pages() helpers Patch series "arch, mm: improve robustness of direct map manipulation", v7. During recent discussion about KVM protected memory, David raised a concern about usage of __kernel_map_pages() outside of DEBUG_PAGEALLOC scope [1]. Indeed, for architectures that define CONFIG_ARCH_HAS_SET_DIRECT_MAP it is possible that __kernel_map_pages() would fail, but since this function is void, the failure will go unnoticed. Moreover, there's lack of consistency of __kernel_map_pages() semantics across architectures as some guard this function with #ifdef DEBUG_PAGEALLOC, some refuse to update the direct map if page allocation debugging is disabled at run time and some allow modifying the direct map regardless of DEBUG_PAGEALLOC settings. This set straightens this out by restoring dependency of __kernel_map_pages() on DEBUG_PAGEALLOC and updating the call sites accordingly. Since currently the only user of __kernel_map_pages() outside DEBUG_PAGEALLOC is hibernation, it is updated to make direct map accesses there more explicit. [1] https://lore.kernel.org/lkml/2759b4bf-e1e3-d006-7d86-78a40348269d@redhat.com This patch (of 4): When CONFIG_DEBUG_PAGEALLOC is enabled, it unmaps pages from the kernel direct mapping after free_pages(). The pages than need to be mapped back before they could be used. Theese mapping operations use __kernel_map_pages() guarded with with debug_pagealloc_enabled(). The only place that calls __kernel_map_pages() without checking whether DEBUG_PAGEALLOC is enabled is the hibernation code that presumes availability of this function when ARCH_HAS_SET_DIRECT_MAP is set. Still, on arm64, __kernel_map_pages() will bail out when DEBUG_PAGEALLOC is not enabled but set_direct_map_invalid_noflush() may render some pages not present in the direct map and hibernation code won't be able to save such pages. To make page allocation debugging and hibernation interaction more robust, the dependency on DEBUG_PAGEALLOC or ARCH_HAS_SET_DIRECT_MAP has to be made more explicit. Start with combining the guard condition and the call to __kernel_map_pages() into debug_pagealloc_map_pages() and debug_pagealloc_unmap_pages() functions to emphasize that __kernel_map_pages() should not be called without DEBUG_PAGEALLOC and use these new functions to map/unmap pages when page allocation debugging is enabled. Link: https://lkml.kernel.org/r/20201109192128.960-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20201109192128.960-2-rppt@kernel.org Signed-off-by: Mike Rapoport Reviewed-by: David Hildenbrand Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: Albert Ou Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Lameter Cc: "David S. Miller" Cc: Dave Hansen Cc: David Rientjes Cc: "Edgecombe, Rick P" Cc: "H. Peter Anvin" Cc: Heiko Carstens Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Len Brown Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pavel Machek Cc: Pekka Enberg Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Cc: Rafael J. Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 48fa3e71be1a..0f4c34672a13 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2943,12 +2943,27 @@ kernel_map_pages(struct page *page, int numpages, int enable) { __kernel_map_pages(page, numpages, enable); } + +static inline void debug_pagealloc_map_pages(struct page *page, int numpages) +{ + if (debug_pagealloc_enabled_static()) + __kernel_map_pages(page, numpages, 1); +} + +static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) +{ + if (debug_pagealloc_enabled_static()) + __kernel_map_pages(page, numpages, 0); +} + #ifdef CONFIG_HIBERNATION extern bool kernel_page_present(struct page *page); #endif /* CONFIG_HIBERNATION */ #else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ static inline void kernel_map_pages(struct page *page, int numpages, int enable) {} +static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} +static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} #ifdef CONFIG_HIBERNATION static inline bool kernel_page_present(struct page *page) { return true; } #endif /* CONFIG_HIBERNATION */ -- cgit v1.2.3 From 2abf962a8d42b32f5ffeb827826290b799c85f86 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:10:25 -0800 Subject: PM: hibernate: make direct map manipulations more explicit When DEBUG_PAGEALLOC or ARCH_HAS_SET_DIRECT_MAP is enabled a page may be not present in the direct map and has to be explicitly mapped before it could be copied. Introduce hibernate_map_page() and hibernation_unmap_page() that will explicitly use set_direct_map_{default,invalid}_noflush() for ARCH_HAS_SET_DIRECT_MAP case and debug_pagealloc_{map,unmap}_pages() for DEBUG_PAGEALLOC case. The remapping of the pages in safe_copy_page() presumes that it only changes protection bits in an existing PTE and so it is safe to ignore return value of set_direct_map_{default,invalid}_noflush(). Still, add a pr_warn() so that future changes in set_memory APIs will not silently break hibernation. Link: https://lkml.kernel.org/r/20201109192128.960-3-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Rafael J. Wysocki Reviewed-by: David Hildenbrand Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: Albert Ou Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Lameter Cc: Dave Hansen Cc: David Rientjes Cc: "David S. Miller" Cc: "Edgecombe, Rick P" Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Len Brown Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pavel Machek Cc: Pekka Enberg Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0f4c34672a13..6b5df31387b5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2934,16 +2934,6 @@ static inline bool debug_pagealloc_enabled_static(void) #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP) extern void __kernel_map_pages(struct page *page, int numpages, int enable); -/* - * When called in DEBUG_PAGEALLOC context, the call should most likely be - * guarded by debug_pagealloc_enabled() or debug_pagealloc_enabled_static() - */ -static inline void -kernel_map_pages(struct page *page, int numpages, int enable) -{ - __kernel_map_pages(page, numpages, enable); -} - static inline void debug_pagealloc_map_pages(struct page *page, int numpages) { if (debug_pagealloc_enabled_static()) @@ -2960,8 +2950,6 @@ static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) extern bool kernel_page_present(struct page *page); #endif /* CONFIG_HIBERNATION */ #else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ -static inline void -kernel_map_pages(struct page *page, int numpages, int enable) {} static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} #ifdef CONFIG_HIBERNATION -- cgit v1.2.3 From 5d6ad668f31625c6aa9ed8dc3bdb29561d2b1144 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:10:30 -0800 Subject: arch, mm: restore dependency of __kernel_map_pages() on DEBUG_PAGEALLOC The design of DEBUG_PAGEALLOC presumes that __kernel_map_pages() must never fail. With this assumption is wouldn't be safe to allow general usage of this function. Moreover, some architectures that implement __kernel_map_pages() have this function guarded by #ifdef DEBUG_PAGEALLOC and some refuse to map/unmap pages when page allocation debugging is disabled at runtime. As all the users of __kernel_map_pages() were converted to use debug_pagealloc_map_pages() it is safe to make it available only when DEBUG_PAGEALLOC is set. Link: https://lkml.kernel.org/r/20201109192128.960-4-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: David Hildenbrand Acked-by: Kirill A. Shutemov Cc: Albert Ou Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Lameter Cc: Dave Hansen Cc: David Rientjes Cc: "David S. Miller" Cc: "Edgecombe, Rick P" Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Len Brown Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pavel Machek Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6b5df31387b5..7a67bdf3df5e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2931,7 +2931,11 @@ static inline bool debug_pagealloc_enabled_static(void) return static_branch_unlikely(&_debug_pagealloc_enabled); } -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP) +#ifdef CONFIG_DEBUG_PAGEALLOC +/* + * To support DEBUG_PAGEALLOC architecture must ensure that + * __kernel_map_pages() never fails + */ extern void __kernel_map_pages(struct page *page, int numpages, int enable); static inline void debug_pagealloc_map_pages(struct page *page, int numpages) @@ -2949,13 +2953,13 @@ static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) #ifdef CONFIG_HIBERNATION extern bool kernel_page_present(struct page *page); #endif /* CONFIG_HIBERNATION */ -#else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ +#else /* CONFIG_DEBUG_PAGEALLOC */ static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} #ifdef CONFIG_HIBERNATION static inline bool kernel_page_present(struct page *page) { return true; } #endif /* CONFIG_HIBERNATION */ -#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ +#endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef __HAVE_ARCH_GATE_AREA extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); -- cgit v1.2.3 From 32a0de886eb3cb7e6990da27a9cdfa50baa8be64 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:10:35 -0800 Subject: arch, mm: make kernel_page_present() always available For architectures that enable ARCH_HAS_SET_MEMORY having the ability to verify that a page is mapped in the kernel direct map can be useful regardless of hibernation. Add RISC-V implementation of kernel_page_present(), update its forward declarations and stubs to be a part of set_memory API and remove ugly ifdefery in inlcude/linux/mm.h around current declarations of kernel_page_present(). Link: https://lkml.kernel.org/r/20201109192128.960-5-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Kirill A. Shutemov Cc: Albert Ou Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Lameter Cc: Dave Hansen Cc: David Hildenbrand Cc: David Rientjes Cc: "David S. Miller" Cc: "Edgecombe, Rick P" Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Len Brown Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pavel Machek Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 ------- include/linux/set_memory.h | 5 +++++ 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7a67bdf3df5e..5ec79757eeae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2949,16 +2949,9 @@ static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) if (debug_pagealloc_enabled_static()) __kernel_map_pages(page, numpages, 0); } - -#ifdef CONFIG_HIBERNATION -extern bool kernel_page_present(struct page *page); -#endif /* CONFIG_HIBERNATION */ #else /* CONFIG_DEBUG_PAGEALLOC */ static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} -#ifdef CONFIG_HIBERNATION -static inline bool kernel_page_present(struct page *page) { return true; } -#endif /* CONFIG_HIBERNATION */ #endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef __HAVE_ARCH_GATE_AREA diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 860e0f843c12..fe1aa4e54680 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -23,6 +23,11 @@ static inline int set_direct_map_default_noflush(struct page *page) { return 0; } + +static inline bool kernel_page_present(struct page *page) +{ + return true; +} #endif #ifndef set_mce_nospec -- cgit v1.2.3 From 952eaf815925f106eb6b68346b3458a68bb18ec1 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:10:53 -0800 Subject: mm, page_alloc: cache pageset high and batch in struct zone All per-cpu pagesets for a zone use the same high and batch values, that are duplicated there just for performance (locality) reasons. This patch adds the same variables also to struct zone as a shared copy. This will be useful later for making possible to disable pcplists temporarily by setting high value to 0, while remembering the values for restoring them later. But we can also immediately benefit from not updating pagesets of all possible cpus in case the newly recalculated values (after sysctl change or memory online/offline) are actually unchanged from the previous ones. Link: https://lkml.kernel.org/r/20201111092812.11329-6-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6e0025b5a88f..4eee08b0062b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -470,6 +470,12 @@ struct zone { #endif struct pglist_data *zone_pgdat; struct per_cpu_pageset __percpu *pageset; + /* + * the high and batch values are copied to individual pagesets for + * faster access + */ + int pageset_high; + int pageset_batch; #ifndef CONFIG_SPARSEMEM /* -- cgit v1.2.3 From 2ee08717da50160c20056f6d6b76afdf65db33ab Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 14 Dec 2020 19:11:02 -0800 Subject: include/linux/page-flags.h: remove unused __[Set|Clear]PagePrivate They are not used anymore. Link: https://lkml.kernel.org/r/20201009135914.64826-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4f6ba9379112..50cbf5e931bc 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -363,8 +363,7 @@ PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) * for its own purposes. * - PG_private and PG_private_2 cause releasepage() and co to be invoked */ -PAGEFLAG(Private, private, PF_ANY) __SETPAGEFLAG(Private, private, PF_ANY) - __CLEARPAGEFLAG(Private, private, PF_ANY) +PAGEFLAG(Private, private, PF_ANY) PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY) PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY) -- cgit v1.2.3 From 3b12da6d1d4adff087939c071e0d74a7857439a0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 14 Dec 2020 19:11:05 -0800 Subject: mm/page-flags: fix comment We haven't had 'dontuse' flags since 2002. Replace this obsolete warning with a hopefully more useful one. Link: https://lkml.kernel.org/r/20201027025823.3704-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 50cbf5e931bc..c1368af622c7 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -86,8 +86,7 @@ */ /* - * Don't use the *_dontuse flags. Use the macros. Otherwise you'll break - * locked- and dirty-page accounting. + * Don't use the pageflags directly. Use the PageFoo macros. * * The page flags field is split into two parts, the main flags area * which extends from the low bits upwards, and the fields area which -- cgit v1.2.3 From ebfe1b8f6ea5d83d8c1aa18ddd8ede432a7414e7 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 14 Dec 2020 19:11:58 -0800 Subject: include/linux/huge_mm.h: remove extern keyword The external function definitions don't need the "extern" keyword. Remove them so future changes don't copy the function definition style. Link: https://lkml.kernel.org/r/20201106235135.32109-1-rcampbell@nvidia.com Signed-off-by: Ralph Campbell Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 93 ++++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 0365aa97f8e7..6a19f35f836b 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -7,43 +7,37 @@ #include /* only for vma_is_dax() */ -extern vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); -extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, - struct vm_area_struct *vma); -extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd); -extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pud_t *dst_pud, pud_t *src_pud, unsigned long addr, - struct vm_area_struct *vma); +vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); +int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, + struct vm_area_struct *vma); +void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd); +int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pud_t *dst_pud, pud_t *src_pud, unsigned long addr, + struct vm_area_struct *vma); #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); +void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); #else static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) { } #endif -extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); -extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, - unsigned long addr, - pmd_t *pmd, - unsigned int flags); -extern bool madvise_free_huge_pmd(struct mmu_gather *tlb, - struct vm_area_struct *vma, - pmd_t *pmd, unsigned long addr, unsigned long next); -extern int zap_huge_pmd(struct mmu_gather *tlb, - struct vm_area_struct *vma, - pmd_t *pmd, unsigned long addr); -extern int zap_huge_pud(struct mmu_gather *tlb, - struct vm_area_struct *vma, - pud_t *pud, unsigned long addr); -extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, - pmd_t *old_pmd, pmd_t *new_pmd); -extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, pgprot_t newprot, - unsigned long cp_flags); +vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); +struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmd, + unsigned int flags); +bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, + pmd_t *pmd, unsigned long addr, unsigned long next); +int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr); +int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, + unsigned long addr); +bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, + unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd); +int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, + pgprot_t newprot, unsigned long cp_flags); vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn, pgprot_t pgprot, bool write); @@ -100,13 +94,13 @@ enum transparent_hugepage_flag { struct kobject; struct kobj_attribute; -extern ssize_t single_hugepage_flag_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count, - enum transparent_hugepage_flag flag); -extern ssize_t single_hugepage_flag_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf, - enum transparent_hugepage_flag flag); +ssize_t single_hugepage_flag_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count, + enum transparent_hugepage_flag flag); +ssize_t single_hugepage_flag_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf, + enum transparent_hugepage_flag flag); extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) @@ -179,12 +173,11 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, (transparent_hugepage_flags & \ (1< Date: Mon, 14 Dec 2020 19:12:46 -0800 Subject: mm/compaction: make defer_compaction and compaction_deferred static defer_compaction() and compaction_deferred() and compaction_restarting() in mm/compaction.c won't be used in other files, so make them static, and remove the declaration in the header file. Take the chance to fix a typo. Link: https://lkml.kernel.org/r/20201123170801.GA9625@rlk Signed-off-by: Hui Su Acked-by: Vlastimil Babka Cc: Nitin Gupta Cc: Baoquan He Cc: Mateusz Nosek Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 1de5a1151ee7..ed4070ed41ef 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -98,11 +98,8 @@ extern void reset_isolation_suitable(pg_data_t *pgdat); extern enum compact_result compaction_suitable(struct zone *zone, int order, unsigned int alloc_flags, int highest_zoneidx); -extern void defer_compaction(struct zone *zone, int order); -extern bool compaction_deferred(struct zone *zone, int order); extern void compaction_defer_reset(struct zone *zone, int order, bool alloc_success); -extern bool compaction_restarting(struct zone *zone, int order); /* Compaction has made some progress and retrying makes sense */ static inline bool compaction_made_progress(enum compact_result result) @@ -194,15 +191,6 @@ static inline enum compact_result compaction_suitable(struct zone *zone, int ord return COMPACT_SKIPPED; } -static inline void defer_compaction(struct zone *zone, int order) -{ -} - -static inline bool compaction_deferred(struct zone *zone, int order) -{ - return true; -} - static inline bool compaction_made_progress(enum compact_result result) { return false; -- cgit v1.2.3 From 0060ef3b4e6dd1410da164d48a595eadb2fb02f7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 14 Dec 2020 19:12:59 -0800 Subject: mm: support THPs in zero_user_segments We can only kmap() one subpage of a THP at a time, so loop over all relevant subpages, skipping ones which don't need to be zeroed. This is too large to inline when THPs are enabled and we actually need highmem, so put it in highmem.c. [willy@infradead.org: start1 was allowed to be less than start2] Link: https://lkml.kernel.org/r/20201124041507.28996-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Yang Shi Cc: Jan Kara Cc: Michal Hocko Cc: Zi Yan Cc: Song Liu Cc: Mel Gorman Cc: Naresh Kamboju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 14e6202ce47f..8e21fe82b3a3 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -284,13 +284,22 @@ static inline void clear_highpage(struct page *page) kunmap_atomic(kaddr); } +/* + * If we pass in a base or tail page, we can zero up to PAGE_SIZE. + * If we pass in a head page, we can zero up to the size of the compound page. + */ +#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +void zero_user_segments(struct page *page, unsigned start1, unsigned end1, + unsigned start2, unsigned end2); +#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ static inline void zero_user_segments(struct page *page, - unsigned start1, unsigned end1, - unsigned start2, unsigned end2) + unsigned start1, unsigned end1, + unsigned start2, unsigned end2) { void *kaddr = kmap_atomic(page); + unsigned int i; - BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE); + BUG_ON(end1 > page_size(page) || end2 > page_size(page)); if (end1 > start1) memset(kaddr + start1, 0, end1 - start1); @@ -299,8 +308,10 @@ static inline void zero_user_segments(struct page *page, memset(kaddr + start2, 0, end2 - start2); kunmap_atomic(kaddr); - flush_dcache_page(page); + for (i = 0; i < compound_nr(page); i++) + flush_dcache_page(page + i); } +#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ static inline void zero_user_segment(struct page *page, unsigned start, unsigned end) -- cgit v1.2.3 From 236c32eb109696590b7428957eda50cc05e22af8 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Mon, 14 Dec 2020 19:13:13 -0800 Subject: mm: migrate: clean up migrate_prep{_local} The migrate_prep{_local} never fails, so it is pointless to have return value and check the return value. Link: https://lkml.kernel.org/r/20201113205359.556831-5-shy828301@gmail.com Signed-off-by: Yang Shi Reviewed-by: Zi Yan Cc: Jan Kara Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Song Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 0f8d1583fa8e..4594838a0f7c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -45,8 +45,8 @@ extern struct page *alloc_migration_target(struct page *page, unsigned long priv extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void putback_movable_page(struct page *page); -extern int migrate_prep(void); -extern int migrate_prep_local(void); +extern void migrate_prep(void); +extern void migrate_prep_local(void); extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, -- cgit v1.2.3 From 04013513cc84c401c7de9023ff3eda7863fc4add Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:13:30 -0800 Subject: mm, page_alloc: do not rely on the order of page_poison and init_on_alloc/free parameters Patch series "cleanup page poisoning", v3. I have identified a number of issues and opportunities for cleanup with CONFIG_PAGE_POISON and friends: - interaction with init_on_alloc and init_on_free parameters depends on the order of parameters (Patch 1) - the boot time enabling uses static key, but inefficienty (Patch 2) - sanity checking is incompatible with hibernation (Patch 3) - CONFIG_PAGE_POISONING_NO_SANITY can be removed now that we have init_on_free (Patch 4) - CONFIG_PAGE_POISONING_ZERO can be most likely removed now that we have init_on_free (Patch 5) This patch (of 5): Enabling page_poison=1 together with init_on_alloc=1 or init_on_free=1 produces a warning in dmesg that page_poison takes precedence. However, as these warnings are printed in early_param handlers for init_on_alloc/free, they are not printed if page_poison is enabled later on the command line (handlers are called in the order of their parameters), or when init_on_alloc/free is always enabled by the respective config option - before the page_poison early param handler is called, it is not considered to be enabled. This is inconsistent. We can remove the dependency on order by making the init_on_* parameters only set a boolean variable, and postponing the evaluation after all early params have been processed. Introduce a new init_mem_debugging_and_hardening() function for that, and move the related debug_pagealloc processing there as well. As a result init_mem_debugging_and_hardening() knows always accurately if init_on_* and/or page_poison options were enabled. Thus we can also optimize want_init_on_alloc() and want_init_on_free(). We don't need to check page_poisoning_enabled() there, we can instead not enable the init_on_* static keys at all, if page poisoning is enabled. This results in a simpler and more effective code. Link: https://lkml.kernel.org/r/20201113104033.22907-1-vbabka@suse.cz Link: https://lkml.kernel.org/r/20201113104033.22907-2-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: David Hildenbrand Reviewed-by: Mike Rapoport Cc: Rafael J. Wysocki Cc: Alexander Potapenko Cc: Kees Cook Cc: Michal Hocko Cc: Mateusz Nosek Cc: Laura Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5ec79757eeae..591c784277ea 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2872,6 +2872,7 @@ extern int apply_to_existing_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +extern void init_mem_debugging_and_hardening(void); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); @@ -2881,35 +2882,20 @@ static inline void kernel_poison_pages(struct page *page, int numpages, int enable) { } #endif -#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON -DECLARE_STATIC_KEY_TRUE(init_on_alloc); -#else DECLARE_STATIC_KEY_FALSE(init_on_alloc); -#endif static inline bool want_init_on_alloc(gfp_t flags) { - if (static_branch_unlikely(&init_on_alloc) && - !page_poisoning_enabled()) + if (static_branch_unlikely(&init_on_alloc)) return true; return flags & __GFP_ZERO; } -#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON -DECLARE_STATIC_KEY_TRUE(init_on_free); -#else DECLARE_STATIC_KEY_FALSE(init_on_free); -#endif static inline bool want_init_on_free(void) { - return static_branch_unlikely(&init_on_free) && - !page_poisoning_enabled(); + return static_branch_unlikely(&init_on_free); } -#ifdef CONFIG_DEBUG_PAGEALLOC -extern void init_debug_pagealloc(void); -#else -static inline void init_debug_pagealloc(void) {} -#endif extern bool _debug_pagealloc_enabled_early; DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled); -- cgit v1.2.3 From 8db26a3d47354ce7271a8cab03cd65b9d3d610b9 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:13:34 -0800 Subject: mm, page_poison: use static key more efficiently Commit 11c9c7edae06 ("mm/page_poison.c: replace bool variable with static key") changed page_poisoning_enabled() to a static key check. However, the function is not inlined, so each check still involves a function call with overhead not eliminated when page poisoning is disabled. Analogically to how debug_pagealloc is handled, this patch converts page_poisoning_enabled() back to boolean check, and introduces page_poisoning_enabled_static() for fast paths. Both functions are inlined. The function kernel_poison_pages() is also called unconditionally and does the static key check inside. Remove it from there and put it to callers. Also split it to two functions kernel_poison_pages() and kernel_unpoison_pages() instead of the confusing bool parameter. Also optimize the check that enables page poisoning instead of debug_pagealloc for architectures without proper debug_pagealloc support. Move the check to init_mem_debugging_and_hardening() to enable a single static key instead of having two static branches in page_poisoning_enabled_static(). Link: https://lkml.kernel.org/r/20201113104033.22907-3-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: David Hildenbrand Cc: Mike Rapoport Cc: Rafael J. Wysocki Cc: Alexander Potapenko Cc: Kees Cook Cc: Laura Abbott Cc: Mateusz Nosek Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 591c784277ea..026707a58159 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2874,12 +2874,37 @@ extern int apply_to_existing_page_range(struct mm_struct *mm, extern void init_mem_debugging_and_hardening(void); #ifdef CONFIG_PAGE_POISONING -extern bool page_poisoning_enabled(void); -extern void kernel_poison_pages(struct page *page, int numpages, int enable); +extern void __kernel_poison_pages(struct page *page, int numpages); +extern void __kernel_unpoison_pages(struct page *page, int numpages); +extern bool _page_poisoning_enabled_early; +DECLARE_STATIC_KEY_FALSE(_page_poisoning_enabled); +static inline bool page_poisoning_enabled(void) +{ + return _page_poisoning_enabled_early; +} +/* + * For use in fast paths after init_mem_debugging() has run, or when a + * false negative result is not harmful when called too early. + */ +static inline bool page_poisoning_enabled_static(void) +{ + return static_branch_unlikely(&_page_poisoning_enabled); +} +static inline void kernel_poison_pages(struct page *page, int numpages) +{ + if (page_poisoning_enabled_static()) + __kernel_poison_pages(page, numpages); +} +static inline void kernel_unpoison_pages(struct page *page, int numpages) +{ + if (page_poisoning_enabled_static()) + __kernel_unpoison_pages(page, numpages); +} #else static inline bool page_poisoning_enabled(void) { return false; } -static inline void kernel_poison_pages(struct page *page, int numpages, - int enable) { } +static inline bool page_poisoning_enabled_static(void) { return false; } +static inline void kernel_poison_pages(struct page *page, int numpages) { } +static inline void kernel_unpoison_pages(struct page *page, int numpages) { } #endif DECLARE_STATIC_KEY_FALSE(init_on_alloc); -- cgit v1.2.3 From 03b6c9a3e8805606c0bb4ad41855fac3bf85c3b9 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:13:38 -0800 Subject: kernel/power: allow hibernation with page_poison sanity checking Page poisoning used to be incompatible with hibernation, as the state of poisoned pages was lost after resume, thus enabling CONFIG_HIBERNATION forces CONFIG_PAGE_POISONING_NO_SANITY. For the same reason, the poisoning with zeroes variant CONFIG_PAGE_POISONING_ZERO used to disable hibernation. The latter restriction was removed by commit 1ad1410f632d ("PM / Hibernate: allow hibernation with PAGE_POISONING_ZERO") and similarly for init_on_free by commit 18451f9f9e58 ("PM: hibernate: fix crashes with init_on_free=1") by making sure free pages are cleared after resume. We can use the same mechanism to instead poison free pages with PAGE_POISON after resume. This covers both zero and 0xAA patterns. Thus we can remove the Kconfig restriction that disables page poison sanity checking when hibernation is enabled. Link: https://lkml.kernel.org/r/20201113104033.22907-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Rafael J. Wysocki [hibernation] Reviewed-by: David Hildenbrand Cc: Mike Rapoport Cc: Alexander Potapenko Cc: Kees Cook Cc: Laura Abbott Cc: Mateusz Nosek Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 026707a58159..3e1fe8ca9720 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2903,6 +2903,7 @@ static inline void kernel_unpoison_pages(struct page *page, int numpages) #else static inline bool page_poisoning_enabled(void) { return false; } static inline bool page_poisoning_enabled_static(void) { return false; } +static inline void __kernel_poison_pages(struct page *page, int nunmpages) { } static inline void kernel_poison_pages(struct page *page, int numpages) { } static inline void kernel_unpoison_pages(struct page *page, int numpages) { } #endif -- cgit v1.2.3 From f289041ed4cf9a3f6e8a32068fef9ffb2acc5662 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:13:45 -0800 Subject: mm, page_poison: remove CONFIG_PAGE_POISONING_ZERO CONFIG_PAGE_POISONING_ZERO uses the zero pattern instead of 0xAA. It was introduced by commit 1414c7f4f7d7 ("mm/page_poisoning.c: allow for zero poisoning"), noting that using zeroes retains the benefit of sanitizing content of freed pages, with the benefit of not having to zero them again on alloc, and the downside of making some forms of corruption (stray writes of NULLs) harder to detect than with the 0xAA pattern. Together with CONFIG_PAGE_POISONING_NO_SANITY it made possible to sanitize the contents on free without checking it back on alloc. These days we have the init_on_free() option to achieve sanitization with zeroes and to save clearing on alloc (and without checking on alloc). Arguably if someone does choose to check the poison for corruption on alloc, the savings of not clearing the page are secondary, and it makes sense to always use the 0xAA poison pattern. Thus, remove the CONFIG_PAGE_POISONING_ZERO option for being redundant. Link: https://lkml.kernel.org/r/20201113104033.22907-6-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Mike Rapoport Cc: Rafael J. Wysocki Cc: Alexander Potapenko Cc: Kees Cook Cc: Laura Abbott Cc: Mateusz Nosek Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/poison.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/poison.h b/include/linux/poison.h index dc8ae5d8db03..aff1c9250c82 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -27,11 +27,7 @@ #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) /********** mm/page_poison.c **********/ -#ifdef CONFIG_PAGE_POISONING_ZERO -#define PAGE_POISON 0x00 -#else #define PAGE_POISON 0xaa -#endif /********** mm/page_alloc.c ************/ -- cgit v1.2.3 From 88dcb9a3fb48c67ec345f1cdbc2a26119d3cb57d Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 15 Dec 2020 12:33:20 -0800 Subject: mm/thp: move lru_add_page_tail() to huge_memory.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "per memcg lru lock", v21. This patchset includes 3 parts: 1) some code cleanup and minimum optimization as preparation 2) use TestCleanPageLRU as page isolation's precondition 3) replace per node lru_lock with per memcg per node lru_lock Current lru_lock is one for each of node, pgdat->lru_lock, that guard for lru lists, but now we had moved the lru lists into memcg for long time. Still using per node lru_lock is clearly unscalable, pages on each of memcgs have to compete each others for a whole lru_lock. This patchset try to use per lruvec/memcg lru_lock to repleace per node lru lock to guard lru lists, make it scalable for memcgs and get performance gain. Currently lru_lock still guards both lru list and page's lru bit, that's ok. but if we want to use specific lruvec lock on the page, we need to pin down the page's lruvec/memcg during locking. Just taking lruvec lock first may be undermined by the page's memcg charge/migration. To fix this problem, we could take out the page's lru bit clear and use it as pin down action to block the memcg changes. That's the reason for new atomic func TestClearPageLRU. So now isolating a page need both actions: TestClearPageLRU and hold the lru_lock. The typical usage of this is isolate_migratepages_block() in compaction.c we have to take lru bit before lru lock, that serialized the page isolation in memcg page charge/migration which will change page's lruvec and new lru_lock in it. The above solution suggested by Johannes Weiner, and based on his new memcg charge path, then have this patchset. (Hugh Dickins tested and contributed much code from compaction fix to general code polish, thanks a lot!). Daniel Jordan's testing show 62% improvement on modified readtwice case on his 2P * 10 core * 2 HT broadwell box on v18, which has no much different with this v20. https://lore.kernel.org/lkml/20200915165807.kpp7uhiw7l3loofu@ca-dmjordan1.us.oracle.com/ Thanks to Hugh Dickins and Konstantin Khlebnikov, they both brought this idea 8 years ago, and others who gave comments as well: Daniel Jordan, Mel Gorman, Shakeel Butt, Matthew Wilcox, Alexander Duyck etc. Thanks for Testing support from Intel 0day and Rong Chen, Fengguang Wu, and Yun Wang. Hugh Dickins also shared his kbuild-swap case. This patch (of 19): lru_add_page_tail() is only used in huge_memory.c, defining it in other file with a CONFIG_TRANSPARENT_HUGEPAGE macro restrict just looks weird. Let's move it THP. And make it static as Hugh Dickins suggested. Link: https://lkml.kernel.org/r/1604566549-62481-1-git-send-email-alex.shi@linux.alibaba.com Link: https://lkml.kernel.org/r/1604566549-62481-2-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Reviewed-by: Kirill A. Shutemov Acked-by: Hugh Dickins Acked-by: Johannes Weiner Cc: Matthew Wilcox Cc: Mel Gorman Cc: Tejun Heo Cc: Konstantin Khlebnikov Cc: Daniel Jordan Cc: Shakeel Butt Cc: Joonsoo Kim Cc: Wei Yang Cc: Alexander Duyck Cc: "Chen, Rong A" Cc: Michal Hocko Cc: Vladimir Davydov Cc: Andrea Arcangeli Cc: Andrey Ryabinin Cc: "Huang, Ying" Cc: Jann Horn Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 667935c0dbd4..5e1e967c225f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -338,8 +338,6 @@ extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); extern void lru_note_cost_page(struct page *); extern void lru_cache_add(struct page *); -extern void lru_add_page_tail(struct page *page, struct page *page_tail, - struct lruvec *lruvec, struct list_head *head); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); -- cgit v1.2.3 From d25b5bd8a8f420b15517c19c4626c0c009f72a63 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 15 Dec 2020 12:34:16 -0800 Subject: mm/lru: introduce TestClearPageLRU() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently lru_lock still guards both lru list and page's lru bit, that's ok. but if we want to use specific lruvec lock on the page, we need to pin down the page's lruvec/memcg during locking. Just taking lruvec lock first may be undermined by the page's memcg charge/migration. To fix this problem, we will clear the lru bit out of locking and use it as pin down action to block the page isolation in memcg changing. So now a standard steps of page isolation is following: 1, get_page(); #pin the page avoid to be free 2, TestClearPageLRU(); #block other isolation like memcg change 3, spin_lock on lru_lock; #serialize lru list access 4, delete page from lru list; This patch start with the first part: TestClearPageLRU, which combines PageLRU check and ClearPageLRU into a macro func TestClearPageLRU. This function will be used as page isolation precondition to prevent other isolations some where else. Then there are may !PageLRU page on lru list, need to remove BUG() checking accordingly. There 2 rules for lru bit now: 1, the lru bit still indicate if a page on lru list, just in some temporary moment(isolating), the page may have no lru bit when it's on lru list. but the page still must be on lru list when the lru bit set. 2, have to remove lru bit before delete it from lru list. As Andrew Morton mentioned this change would dirty cacheline for a page which isn't on the LRU. But the loss would be acceptable in Rong Chen report: https://lore.kernel.org/lkml/20200304090301.GB5972@shao2-debian/ Link: https://lkml.kernel.org/r/1604566549-62481-15-git-send-email-alex.shi@linux.alibaba.com Suggested-by: Johannes Weiner Signed-off-by: Alex Shi Acked-by: Hugh Dickins Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Michal Hocko Cc: Vladimir Davydov Cc: Alexander Duyck Cc: Andrea Arcangeli Cc: Andrey Ryabinin Cc: Daniel Jordan Cc: "Huang, Ying" Cc: Jann Horn Cc: Joonsoo Kim Cc: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Shakeel Butt Cc: Tejun Heo Cc: Thomas Gleixner Cc: Wei Yang Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c1368af622c7..f8b4375ce1b3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -334,6 +334,7 @@ PAGEFLAG(Referenced, referenced, PF_HEAD) PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) + TESTCLEARFLAG(LRU, lru, PF_HEAD) PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) TESTCLEARFLAG(Active, active, PF_HEAD) PAGEFLAG(Workingset, workingset, PF_HEAD) -- cgit v1.2.3 From 9df41314390b81a541ca6e84c8340bad0959e4b5 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 15 Dec 2020 12:34:20 -0800 Subject: mm/compaction: do page isolation first in compaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, compaction would get the lru_lock and then do page isolation which works fine with pgdat->lru_lock, since any page isoltion would compete for the lru_lock. If we want to change to memcg lru_lock, we have to isolate the page before getting lru_lock, thus isoltion would block page's memcg change which relay on page isoltion too. Then we could safely use per memcg lru_lock later. The new page isolation use previous introduced TestClearPageLRU() + pgdat lru locking which will be changed to memcg lru lock later. Hugh Dickins fixed following bugs in this patch's early version: Fix lots of crashes under compaction load: isolate_migratepages_block() must clean up appropriately when rejecting a page, setting PageLRU again if it had been cleared; and a put_page() after get_page_unless_zero() cannot safely be done while holding locked_lruvec - it may turn out to be the final put_page(), which will take an lruvec lock when PageLRU. And move __isolate_lru_page_prepare back after get_page_unless_zero to make trylock_page() safe: trylock_page() is not safe to use at this time: its setting PG_locked can race with the page being freed or allocated ("Bad page"), and can also erase flags being set by one of those "sole owners" of a freshly allocated page who use non-atomic __SetPageFlag(). Link: https://lkml.kernel.org/r/1604566549-62481-16-git-send-email-alex.shi@linux.alibaba.com Suggested-by: Johannes Weiner Signed-off-by: Alex Shi Acked-by: Hugh Dickins Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Matthew Wilcox Cc: Alexander Duyck Cc: Andrea Arcangeli Cc: Andrey Ryabinin Cc: "Chen, Rong A" Cc: Daniel Jordan Cc: "Huang, Ying" Cc: Jann Horn Cc: Joonsoo Kim Cc: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Shakeel Butt Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: Wei Yang Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 5e1e967c225f..596bc2f4d9b0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -356,7 +356,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page, extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); -extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); +extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, -- cgit v1.2.3 From 6168d0da2b479ce25a4647de194045de1bdd1f1d Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 15 Dec 2020 12:34:29 -0800 Subject: mm/lru: replace pgdat lru_lock with lruvec lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch moves per node lru_lock into lruvec, thus bring a lru_lock for each of memcg per node. So on a large machine, each of memcg don't have to suffer from per node pgdat->lru_lock competition. They could go fast with their self lru_lock. After move memcg charge before lru inserting, page isolation could serialize page's memcg, then per memcg lruvec lock is stable and could replace per node lru lock. In isolate_migratepages_block(), compact_unlock_should_abort and lock_page_lruvec_irqsave are open coded to work with compact_control. Also add a debug func in locking which may give some clues if there are sth out of hands. Daniel Jordan's testing show 62% improvement on modified readtwice case on his 2P * 10 core * 2 HT broadwell box. https://lore.kernel.org/lkml/20200915165807.kpp7uhiw7l3loofu@ca-dmjordan1.us.oracle.com/ Hugh Dickins helped on the patch polish, thanks! [alex.shi@linux.alibaba.com: fix comment typo] Link: https://lkml.kernel.org/r/5b085715-292a-4b43-50b3-d73dc90d1de5@linux.alibaba.com [alex.shi@linux.alibaba.com: use page_memcg()] Link: https://lkml.kernel.org/r/5a4c2b72-7ee8-2478-fc0e-85eb83aafec4@linux.alibaba.com Link: https://lkml.kernel.org/r/1604566549-62481-18-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Acked-by: Hugh Dickins Acked-by: Johannes Weiner Cc: Rong Chen Cc: Michal Hocko Cc: Vladimir Davydov Cc: Yang Shi Cc: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Daniel Jordan Cc: Alexander Duyck Cc: Andrea Arcangeli Cc: Andrey Ryabinin Cc: "Huang, Ying" Cc: Jann Horn Cc: Joonsoo Kim Cc: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Shakeel Butt Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mmzone.h | 3 ++- 2 files changed, 60 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f530d634f055..aa5d559853c2 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -491,6 +491,19 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); struct mem_cgroup *get_mem_cgroup_from_page(struct page *page); +struct lruvec *lock_page_lruvec(struct page *page); +struct lruvec *lock_page_lruvec_irq(struct page *page); +struct lruvec *lock_page_lruvec_irqsave(struct page *page, + unsigned long *flags); + +#ifdef CONFIG_DEBUG_VM +void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page); +#else +static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +{ +} +#endif + static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ return css ? container_of(css, struct mem_cgroup, css) : NULL; @@ -996,6 +1009,31 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } +static inline struct lruvec *lock_page_lruvec(struct page *page) +{ + struct pglist_data *pgdat = page_pgdat(page); + + spin_lock(&pgdat->__lruvec.lru_lock); + return &pgdat->__lruvec; +} + +static inline struct lruvec *lock_page_lruvec_irq(struct page *page) +{ + struct pglist_data *pgdat = page_pgdat(page); + + spin_lock_irq(&pgdat->__lruvec.lru_lock); + return &pgdat->__lruvec; +} + +static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page, + unsigned long *flagsp) +{ + struct pglist_data *pgdat = page_pgdat(page); + + spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp); + return &pgdat->__lruvec; +} + static inline struct mem_cgroup * mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, @@ -1215,6 +1253,10 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } + +static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +{ +} #endif /* CONFIG_MEMCG */ /* idx can be of type enum memcg_stat_item or node_stat_item */ @@ -1296,6 +1338,22 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec)); } +static inline void unlock_page_lruvec(struct lruvec *lruvec) +{ + spin_unlock(&lruvec->lru_lock); +} + +static inline void unlock_page_lruvec_irq(struct lruvec *lruvec) +{ + spin_unlock_irq(&lruvec->lru_lock); +} + +static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, + unsigned long flags) +{ + spin_unlock_irqrestore(&lruvec->lru_lock, flags); +} + #ifdef CONFIG_CGROUP_WRITEBACK struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 98a80c01d150..9da23c019dc5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -276,6 +276,8 @@ enum lruvec_flags { struct lruvec { struct list_head lists[NR_LRU_LISTS]; + /* per lruvec lru_lock for memcg */ + spinlock_t lru_lock; /* * These track the cost of reclaiming one LRU - file or anon - * over the other. As the observed cost of reclaiming one LRU @@ -782,7 +784,6 @@ typedef struct pglist_data { /* Write-intensive fields used by page reclaim */ ZONE_PADDING(_pad1_) - spinlock_t lru_lock; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* -- cgit v1.2.3 From 2a5e4e340b0fe0f8d402196a466887db6a270b9b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 15 Dec 2020 12:34:33 -0800 Subject: mm/lru: introduce relock_page_lruvec() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add relock_page_lruvec() to replace repeated same code, no functional change. When testing for relock we can avoid the need for RCU locking if we simply compare the page pgdat and memcg pointers versus those that the lruvec is holding. By doing this we can avoid the extra pointer walks and accesses of the memory cgroup. In addition we can avoid the checks entirely if lruvec is currently NULL. [alex.shi@linux.alibaba.com: use page_memcg()] Link: https://lkml.kernel.org/r/66d8e79d-7ec6-bfbc-1c82-bf32db3ae5b7@linux.alibaba.com Link: https://lkml.kernel.org/r/1604566549-62481-19-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alexander Duyck Signed-off-by: Alex Shi Acked-by: Hugh Dickins Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Thomas Gleixner Cc: Andrey Ryabinin Cc: Matthew Wilcox Cc: Mel Gorman Cc: Konstantin Khlebnikov Cc: Tejun Heo Cc: Andrea Arcangeli Cc: "Chen, Rong A" Cc: Daniel Jordan Cc: "Huang, Ying" Cc: Jann Horn Cc: Joonsoo Kim Cc: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Shakeel Butt Cc: Vladimir Davydov Cc: Wei Yang Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index aa5d559853c2..ff02f831e7e1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -485,6 +485,22 @@ out: struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *); +static inline bool lruvec_holds_page_lru_lock(struct page *page, + struct lruvec *lruvec) +{ + pg_data_t *pgdat = page_pgdat(page); + const struct mem_cgroup *memcg; + struct mem_cgroup_per_node *mz; + + if (mem_cgroup_disabled()) + return lruvec == &pgdat->__lruvec; + + mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + memcg = page_memcg(page) ? : root_mem_cgroup; + + return lruvec->pgdat == pgdat && mz->memcg == memcg; +} + struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); @@ -984,6 +1000,14 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, return &pgdat->__lruvec; } +static inline bool lruvec_holds_page_lru_lock(struct page *page, + struct lruvec *lruvec) +{ + pg_data_t *pgdat = page_pgdat(page); + + return lruvec == &pgdat->__lruvec; +} + static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) { return NULL; @@ -1354,6 +1378,34 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, spin_unlock_irqrestore(&lruvec->lru_lock, flags); } +/* Don't lock again iff page's lruvec locked */ +static inline struct lruvec *relock_page_lruvec_irq(struct page *page, + struct lruvec *locked_lruvec) +{ + if (locked_lruvec) { + if (lruvec_holds_page_lru_lock(page, locked_lruvec)) + return locked_lruvec; + + unlock_page_lruvec_irq(locked_lruvec); + } + + return lock_page_lruvec_irq(page); +} + +/* Don't lock again iff page's lruvec locked */ +static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, + struct lruvec *locked_lruvec, unsigned long *flags) +{ + if (locked_lruvec) { + if (lruvec_holds_page_lru_lock(page, locked_lruvec)) + return locked_lruvec; + + unlock_page_lruvec_irqrestore(locked_lruvec, *flags); + } + + return lock_page_lruvec_irqsave(page, flags); +} + #ifdef CONFIG_CGROUP_WRITEBACK struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); -- cgit v1.2.3 From 15b447361794271f4d03c04d82276a841fe06328 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 15 Dec 2020 14:21:31 -0800 Subject: mm/lru: revise the comments of lru_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we changed the pgdat->lru_lock to lruvec->lru_lock, it's time to fix the incorrect comments in code. Also fixed some zone->lru_lock comment error from ancient time. etc. I struggled to understand the comment above move_pages_to_lru() (surely it never calls page_referenced()), and eventually realized that most of it had got separated from shrink_active_list(): move that comment back. Link: https://lkml.kernel.org/r/1604566549-62481-20-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Hugh Dickins Signed-off-by: Alex Shi Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Tejun Heo Cc: Andrey Ryabinin Cc: Jann Horn Cc: Mel Gorman Cc: Matthew Wilcox Cc: Alexander Duyck Cc: Andrea Arcangeli Cc: "Chen, Rong A" Cc: Daniel Jordan Cc: "Huang, Ying" Cc: Joonsoo Kim Cc: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Michal Hocko Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Shakeel Butt Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: Wei Yang Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- include/linux/mmzone.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 915f4f100383..a9688cc55964 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -79,7 +79,7 @@ struct page { struct { /* Page cache and anonymous pages */ /** * @lru: Pageout list, eg. active_list protected by - * pgdat->lru_lock. Sometimes used as a generic list + * lruvec->lru_lock. Sometimes used as a generic list * by the page owner. */ struct list_head lru; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9da23c019dc5..b593316bff3d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -113,8 +113,7 @@ static inline bool free_area_empty(struct free_area *area, int migratetype) struct pglist_data; /* - * zone->lock and the zone lru_lock are two of the hottest locks in the kernel. - * So add a wild amount of padding here to ensure that they fall into separate + * Add a wild amount of padding here to ensure datas fall into separate * cachelines. There are very few zone structures in the machine, so space * consumption is not a concern here. */ -- cgit v1.2.3 From c6c75deda81344c3a95d1d1f606d5cee109e5d54 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 15 Dec 2020 20:42:39 -0800 Subject: proc: fix lookup in /proc/net subdirectories after setns(2) Commit 1fde6f21d90f ("proc: fix /proc/net/* after setns(2)") only forced revalidation of regular files under /proc/net/ However, /proc/net/ is unusual in the sense of /proc/net/foo handlers take netns pointer from parent directory which is old netns. Steps to reproduce: (void)open("/proc/net/sctp/snmp", O_RDONLY); unshare(CLONE_NEWNET); int fd = open("/proc/net/sctp/snmp", O_RDONLY); read(fd, &c, 1); Read will read wrong data from original netns. Patch forces lookup on every directory under /proc/net . Link: https://lkml.kernel.org/r/20201205160916.GA109739@localhost.localdomain Fixes: 1da4d377f943 ("proc: revalidate misc dentries") Signed-off-by: Alexey Dobriyan Reported-by: "Rantala, Tommi T. (Nokia - FI/Espoo)" Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 270cab43ca3d..000cc0533c33 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -80,6 +80,7 @@ extern void proc_flush_pid(struct pid *); extern struct proc_dir_entry *proc_symlink(const char *, struct proc_dir_entry *, const char *); +struct proc_dir_entry *_proc_mkdir(const char *, umode_t, struct proc_dir_entry *, void *, bool); extern struct proc_dir_entry *proc_mkdir(const char *, struct proc_dir_entry *); extern struct proc_dir_entry *proc_mkdir_data(const char *, umode_t, struct proc_dir_entry *, void *); @@ -162,6 +163,11 @@ static inline struct proc_dir_entry *proc_symlink(const char *name, static inline struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) {return NULL;} static inline struct proc_dir_entry *proc_create_mount_point(const char *name) { return NULL; } +static inline struct proc_dir_entry *_proc_mkdir(const char *name, umode_t mode, + struct proc_dir_entry *parent, void *data, bool force_lookup) +{ + return NULL; +} static inline struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, struct proc_dir_entry *parent, void *data) { return NULL; } static inline struct proc_dir_entry *proc_mkdir_mode(const char *name, @@ -199,7 +205,7 @@ struct net; static inline struct proc_dir_entry *proc_net_mkdir( struct net *net, const char *name, struct proc_dir_entry *parent) { - return proc_mkdir_data(name, 0, parent, net); + return _proc_mkdir(name, 0, parent, net, true); } struct ns_common; -- cgit v1.2.3 From aa6159ab99a9ab5df835b4750b66cf132a5aa292 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 15 Dec 2020 20:42:48 -0800 Subject: kernel.h: split out mathematical helpers kernel.h is being used as a dump for all kinds of stuff for a long time. Here is the attempt to start cleaning it up by splitting out mathematical helpers. At the same time convert users in header and lib folder to use new header. Though for time being include new header back to kernel.h to avoid twisted indirected includes for existing users. [sfr@canb.auug.org.au: fix powerpc build] Link: https://lkml.kernel.org/r/20201029150809.13059608@canb.auug.org.au Link: https://lkml.kernel.org/r/20201028173212.41768-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: "Paul E. McKenney" Cc: Trond Myklebust Cc: Jeff Layton Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 11 ++- include/linux/dcache.h | 1 + include/linux/iommu-helper.h | 4 +- include/linux/kernel.h | 173 +---------------------------------------- include/linux/math.h | 177 ++++++++++++++++++++++++++++++++++++++++++ include/linux/rcu_node_tree.h | 2 + include/linux/units.h | 2 +- 7 files changed, 194 insertions(+), 176 deletions(-) create mode 100644 include/linux/math.h (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5b74bdf159d6..a61f192c096b 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -1,9 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BITOPS_H #define _LINUX_BITOPS_H + #include #include +#include + /* Set bits in the first 'n' bytes when loaded from memory */ #ifdef __LITTLE_ENDIAN # define aligned_byte_mask(n) ((1UL << 8*(n))-1) @@ -12,10 +15,10 @@ #endif #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) -#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) -#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) -#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(char)) +#define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) +#define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) +#define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) +#define BITS_TO_BYTES(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(char)) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6f95c3300cbb..d7b369fc15d3 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index 70d01edcbf8b..74be34f3a20a 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -3,7 +3,9 @@ #define _LINUX_IOMMU_HELPER_H #include -#include +#include +#include +#include static inline unsigned long iommu_device_max_index(unsigned long size, unsigned long offset, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index dbf6018fc312..f7902d8c1048 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -2,7 +2,6 @@ #ifndef _LINUX_KERNEL_H #define _LINUX_KERNEL_H - #include #include #include @@ -11,12 +10,14 @@ #include #include #include +#include #include #include #include #include + #include -#include + #include #define STACK_MAGIC 0xdeadbeef @@ -54,125 +55,11 @@ } \ ) -/* - * This looks more complex than it should be. But we need to - * get the type for the ~ right in round_down (it needs to be - * as wide as the result!), and we want to evaluate the macro - * arguments just once each. - */ -#define __round_mask(x, y) ((__typeof__(x))((y)-1)) -/** - * round_up - round up to next specified power of 2 - * @x: the value to round - * @y: multiple to round up to (must be a power of 2) - * - * Rounds @x up to next multiple of @y (which must be a power of 2). - * To perform arbitrary rounding up, use roundup() below. - */ -#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) -/** - * round_down - round down to next specified power of 2 - * @x: the value to round - * @y: multiple to round down to (must be a power of 2) - * - * Rounds @x down to next multiple of @y (which must be a power of 2). - * To perform arbitrary rounding down, use rounddown() below. - */ -#define round_down(x, y) ((x) & ~__round_mask(x, y)) - #define typeof_member(T, m) typeof(((T*)0)->m) -#define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP - -#define DIV_ROUND_DOWN_ULL(ll, d) \ - ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) - -#define DIV_ROUND_UP_ULL(ll, d) \ - DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d)) - -#if BITS_PER_LONG == 32 -# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) -#else -# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d) -#endif - -/** - * roundup - round up to the next specified multiple - * @x: the value to up - * @y: multiple to round up to - * - * Rounds @x up to next multiple of @y. If @y will always be a power - * of 2, consider using the faster round_up(). - */ -#define roundup(x, y) ( \ -{ \ - typeof(y) __y = y; \ - (((x) + (__y - 1)) / __y) * __y; \ -} \ -) -/** - * rounddown - round down to next specified multiple - * @x: the value to round - * @y: multiple to round down to - * - * Rounds @x down to next multiple of @y. If @y will always be a power - * of 2, consider using the faster round_down(). - */ -#define rounddown(x, y) ( \ -{ \ - typeof(x) __x = (x); \ - __x - (__x % (y)); \ -} \ -) - -/* - * Divide positive or negative dividend by positive or negative divisor - * and round to closest integer. Result is undefined for negative - * divisors if the dividend variable type is unsigned and for negative - * dividends if the divisor variable type is unsigned. - */ -#define DIV_ROUND_CLOSEST(x, divisor)( \ -{ \ - typeof(x) __x = x; \ - typeof(divisor) __d = divisor; \ - (((typeof(x))-1) > 0 || \ - ((typeof(divisor))-1) > 0 || \ - (((__x) > 0) == ((__d) > 0))) ? \ - (((__x) + ((__d) / 2)) / (__d)) : \ - (((__x) - ((__d) / 2)) / (__d)); \ -} \ -) -/* - * Same as above but for u64 dividends. divisor must be a 32-bit - * number. - */ -#define DIV_ROUND_CLOSEST_ULL(x, divisor)( \ -{ \ - typeof(divisor) __d = divisor; \ - unsigned long long _tmp = (x) + (__d) / 2; \ - do_div(_tmp, __d); \ - _tmp; \ -} \ -) - -/* - * Multiplies an integer by a fraction, while avoiding unnecessary - * overflow or loss of precision. - */ -#define mult_frac(x, numer, denom)( \ -{ \ - typeof(x) quot = (x) / (denom); \ - typeof(x) rem = (x) % (denom); \ - (quot * (numer)) + ((rem * (numer)) / (denom)); \ -} \ -) - - #define _RET_IP_ (unsigned long)__builtin_return_address(0) #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) -#define sector_div(a, b) do_div(a, b) - /** * upper_32_bits - return bits 32-63 of a number * @n: the number we're accessing @@ -272,48 +159,6 @@ extern void __cant_migrate(const char *file, int line); #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) -/** - * abs - return absolute value of an argument - * @x: the value. If it is unsigned type, it is converted to signed type first. - * char is treated as if it was signed (regardless of whether it really is) - * but the macro's return type is preserved as char. - * - * Return: an absolute value of x. - */ -#define abs(x) __abs_choose_expr(x, long long, \ - __abs_choose_expr(x, long, \ - __abs_choose_expr(x, int, \ - __abs_choose_expr(x, short, \ - __abs_choose_expr(x, char, \ - __builtin_choose_expr( \ - __builtin_types_compatible_p(typeof(x), char), \ - (char)({ signed char __x = (x); __x<0?-__x:__x; }), \ - ((void)0))))))) - -#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \ - __builtin_types_compatible_p(typeof(x), signed type) || \ - __builtin_types_compatible_p(typeof(x), unsigned type), \ - ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other) - -/** - * reciprocal_scale - "scale" a value into range [0, ep_ro) - * @val: value - * @ep_ro: right open interval endpoint - * - * Perform a "reciprocal multiplication" in order to "scale" a value into - * range [0, @ep_ro), where the upper interval endpoint is right-open. - * This is useful, e.g. for accessing a index of an array containing - * @ep_ro elements, for example. Think of it as sort of modulus, only that - * the result isn't that of modulo. ;) Note that if initial input is a - * small value, then result will return 0. - * - * Return: a result based on @val in interval [0, @ep_ro). - */ -static inline u32 reciprocal_scale(u32 val, u32 ep_ro) -{ - return (u32)(((u64) val * ep_ro) >> 32); -} - #if defined(CONFIG_MMU) && \ (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)) #define might_fault() __might_fault(__FILE__, __LINE__) @@ -515,18 +360,6 @@ extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int func_ptr_is_kernel_text(void *ptr); -u64 int_pow(u64 base, unsigned int exp); -unsigned long int_sqrt(unsigned long); - -#if BITS_PER_LONG < 64 -u32 int_sqrt64(u64 x); -#else -static inline u32 int_sqrt64(u64 x) -{ - return (u32)int_sqrt(x); -} -#endif - #ifdef CONFIG_SMP extern unsigned int sysctl_oops_all_cpu_backtrace; #else diff --git a/include/linux/math.h b/include/linux/math.h new file mode 100644 index 000000000000..53674a327e39 --- /dev/null +++ b/include/linux/math.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MATH_H +#define _LINUX_MATH_H + +#include +#include + +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) + +/** + * round_up - round up to next specified power of 2 + * @x: the value to round + * @y: multiple to round up to (must be a power of 2) + * + * Rounds @x up to next multiple of @y (which must be a power of 2). + * To perform arbitrary rounding up, use roundup() below. + */ +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) + +/** + * round_down - round down to next specified power of 2 + * @x: the value to round + * @y: multiple to round down to (must be a power of 2) + * + * Rounds @x down to next multiple of @y (which must be a power of 2). + * To perform arbitrary rounding down, use rounddown() below. + */ +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +#define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP + +#define DIV_ROUND_DOWN_ULL(ll, d) \ + ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) + +#define DIV_ROUND_UP_ULL(ll, d) \ + DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d)) + +#if BITS_PER_LONG == 32 +# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) +#else +# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d) +#endif + +/** + * roundup - round up to the next specified multiple + * @x: the value to up + * @y: multiple to round up to + * + * Rounds @x up to next multiple of @y. If @y will always be a power + * of 2, consider using the faster round_up(). + */ +#define roundup(x, y) ( \ +{ \ + typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +/** + * rounddown - round down to next specified multiple + * @x: the value to round + * @y: multiple to round down to + * + * Rounds @x down to next multiple of @y. If @y will always be a power + * of 2, consider using the faster round_down(). + */ +#define rounddown(x, y) ( \ +{ \ + typeof(x) __x = (x); \ + __x - (__x % (y)); \ +} \ +) + +/* + * Divide positive or negative dividend by positive or negative divisor + * and round to closest integer. Result is undefined for negative + * divisors if the dividend variable type is unsigned and for negative + * dividends if the divisor variable type is unsigned. + */ +#define DIV_ROUND_CLOSEST(x, divisor)( \ +{ \ + typeof(x) __x = x; \ + typeof(divisor) __d = divisor; \ + (((typeof(x))-1) > 0 || \ + ((typeof(divisor))-1) > 0 || \ + (((__x) > 0) == ((__d) > 0))) ? \ + (((__x) + ((__d) / 2)) / (__d)) : \ + (((__x) - ((__d) / 2)) / (__d)); \ +} \ +) +/* + * Same as above but for u64 dividends. divisor must be a 32-bit + * number. + */ +#define DIV_ROUND_CLOSEST_ULL(x, divisor)( \ +{ \ + typeof(divisor) __d = divisor; \ + unsigned long long _tmp = (x) + (__d) / 2; \ + do_div(_tmp, __d); \ + _tmp; \ +} \ +) + +/* + * Multiplies an integer by a fraction, while avoiding unnecessary + * overflow or loss of precision. + */ +#define mult_frac(x, numer, denom)( \ +{ \ + typeof(x) quot = (x) / (denom); \ + typeof(x) rem = (x) % (denom); \ + (quot * (numer)) + ((rem * (numer)) / (denom)); \ +} \ +) + +#define sector_div(a, b) do_div(a, b) + +/** + * abs - return absolute value of an argument + * @x: the value. If it is unsigned type, it is converted to signed type first. + * char is treated as if it was signed (regardless of whether it really is) + * but the macro's return type is preserved as char. + * + * Return: an absolute value of x. + */ +#define abs(x) __abs_choose_expr(x, long long, \ + __abs_choose_expr(x, long, \ + __abs_choose_expr(x, int, \ + __abs_choose_expr(x, short, \ + __abs_choose_expr(x, char, \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), char), \ + (char)({ signed char __x = (x); __x<0?-__x:__x; }), \ + ((void)0))))))) + +#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), signed type) || \ + __builtin_types_compatible_p(typeof(x), unsigned type), \ + ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other) + +/** + * reciprocal_scale - "scale" a value into range [0, ep_ro) + * @val: value + * @ep_ro: right open interval endpoint + * + * Perform a "reciprocal multiplication" in order to "scale" a value into + * range [0, @ep_ro), where the upper interval endpoint is right-open. + * This is useful, e.g. for accessing a index of an array containing + * @ep_ro elements, for example. Think of it as sort of modulus, only that + * the result isn't that of modulo. ;) Note that if initial input is a + * small value, then result will return 0. + * + * Return: a result based on @val in interval [0, @ep_ro). + */ +static inline u32 reciprocal_scale(u32 val, u32 ep_ro) +{ + return (u32)(((u64) val * ep_ro) >> 32); +} + +u64 int_pow(u64 base, unsigned int exp); +unsigned long int_sqrt(unsigned long); + +#if BITS_PER_LONG < 64 +u32 int_sqrt64(u64 x); +#else +static inline u32 int_sqrt64(u64 x) +{ + return (u32)int_sqrt(x); +} +#endif + +#endif /* _LINUX_MATH_H */ diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h index b8e094b125ee..78feb8ba7358 100644 --- a/include/linux/rcu_node_tree.h +++ b/include/linux/rcu_node_tree.h @@ -20,6 +20,8 @@ #ifndef __LINUX_RCU_NODE_TREE_H #define __LINUX_RCU_NODE_TREE_H +#include + /* * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and * CONFIG_RCU_FANOUT_LEAF. diff --git a/include/linux/units.h b/include/linux/units.h index aaf716364ec3..5c115c809507 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -2,7 +2,7 @@ #ifndef _LINUX_UNITS_H #define _LINUX_UNITS_H -#include +#include #define ABSOLUTE_ZERO_MILLICELSIUS -273150 -- cgit v1.2.3 From 0bb867795540a9223d44ddcdf478330cba5917f8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 15 Dec 2020 20:42:55 -0800 Subject: include/linux/bitmap.h: convert bitmap_empty() / bitmap_full() to return boolean There is no need to return int type out of boolean expression. Link: https://lkml.kernel.org/r/20201027180936.20806-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Yury Norov Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 99058eb81042..719fe036f567 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -379,7 +379,7 @@ static inline int bitmap_subset(const unsigned long *src1, return __bitmap_subset(src1, src2, nbits); } -static inline int bitmap_empty(const unsigned long *src, unsigned nbits) +static inline bool bitmap_empty(const unsigned long *src, unsigned nbits) { if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); @@ -387,7 +387,7 @@ static inline int bitmap_empty(const unsigned long *src, unsigned nbits) return find_first_bit(src, nbits) == nbits; } -static inline int bitmap_full(const unsigned long *src, unsigned int nbits) +static inline bool bitmap_full(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); -- cgit v1.2.3 From ab7d7798dad5aae23bb502f1a6fc0d637b07dc47 Mon Sep 17 00:00:00 2001 From: "Ma, Jianpeng" Date: Tue, 15 Dec 2020 20:42:57 -0800 Subject: bitmap: remove unused function declaration Link: https://lkml.kernel.org/r/BN7PR11MB26097166B6B46387D8A1ABA4FDE30@BN7PR11MB2609.namprd11.prod.outlook.com Fixes: 2afe27c718b6 ("lib/bitmap.c: bitmap_[empty,full]: remove code duplication") Signed-off-by: Jianpeng Ma Acked-by: Yury Norov Reviewed-by: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 719fe036f567..70a932470b2d 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -126,8 +126,6 @@ extern void bitmap_free(const unsigned long *bitmap); * lib/bitmap.c provides these functions: */ -extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits); -extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern bool __pure __bitmap_or_equal(const unsigned long *src1, -- cgit v1.2.3 From 2f78788b55baa3410b1ec91a576286abe1ad4d6a Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 15 Dec 2020 20:43:37 -0800 Subject: ilog2: improve ilog2 for constant arguments As discussed in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97445 the const_ilog2 macro generates a lot of code which interferes badly with GCC inlining heuristics, until it can be proven that the ilog2 argument can or can't be simplified into a constant. It can be expressed using __builtin_clzll builtin which is supported by GCC 3.4 and later and when used only in the __builtin_constant_p guarded code it ought to always fold back to a constant. Other compilers support the same builtin for many years too. Other option would be to change the const_ilog2 macro, though as the description says it is meant to be used also in C constant expressions, and while GCC will fold it to constant with constant argument even in those, perhaps it is better to avoid using extensions in that case. [akpm@linux-foundation.org: coding style fixes] Link: https://lkml.kernel.org/r/20201120125154.GB3040@hirez.programming.kicks-ass.net Link: https://lkml.kernel.org/r/20201021132718.GB2176@tucnak Signed-off-by: Jakub Jelinek Signed-off-by: Peter Zijlstra (Intel) Cc: Christophe Leroy Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/log2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/log2.h b/include/linux/log2.h index c619ec6eff4a..df0b155c2141 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -156,7 +156,8 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define ilog2(n) \ ( \ __builtin_constant_p(n) ? \ - const_ilog2(n) : \ + ((n) < 2 ? 0 : \ + 63 - __builtin_clzll(n)) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ -- cgit v1.2.3 From 6a39e62abbafd1d58d1722f40c7d26ef379c6a2f Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 15 Dec 2020 20:43:44 -0800 Subject: lib: string.h: detect intra-object overflow in fortified string functions Patch series "Fortify strscpy()", v7. This patch implements a fortified version of strscpy() enabled by setting CONFIG_FORTIFY_SOURCE=y. The new version ensures the following before calling vanilla strscpy(): 1. There is no read overflow because either size is smaller than src length or we shrink size to src length by calling fortified strnlen(). 2. There is no write overflow because we either failed during compilation or at runtime by checking that size is smaller than dest size. Note that, if src and dst size cannot be got, the patch defaults to call vanilla strscpy(). The patches adds the following: 1. Implement the fortified version of strscpy(). 2. Add a new LKDTM test to ensures the fortified version still returns the same value as the vanilla one while panic'ing when there is a write overflow. 3. Correct some typos in LKDTM related file. I based my modifications on top of two patches from Daniel Axtens which modify calls to __builtin_object_size, in fortified string functions, to ensure the true size of char * are returned and not the surrounding structure size. About performance, I measured the slow down of fortified strscpy(), using the vanilla one as baseline. The hardware I used is an Intel i3 2130 CPU clocked at 3.4 GHz. I ran "Linux 5.10.0-rc4+ SMP PREEMPT" inside qemu 3.10 with 4 CPU cores. The following code, called through LKDTM, was used as a benchmark: #define TIMES 10000 char *src; char dst[7]; int i; ktime_t begin; src = kstrdup("foobar", GFP_KERNEL); if (src == NULL) return; begin = ktime_get(); for (i = 0; i < TIMES; i++) strscpy(dst, src, strlen(src)); pr_info("%d fortified strscpy() tooks %lld", TIMES, ktime_get() - begin); begin = ktime_get(); for (i = 0; i < TIMES; i++) __real_strscpy(dst, src, strlen(src)); pr_info("%d vanilla strscpy() tooks %lld", TIMES, ktime_get() - begin); kfree(src); I called the above code 30 times to compute stats for each version (in ns, round to int): | version | mean | std | median | 95th | | --------- | ------- | ------ | ------- | ------- | | fortified | 245_069 | 54_657 | 216_230 | 331_122 | | vanilla | 172_501 | 70_281 | 143_539 | 219_553 | On average, fortified strscpy() is approximately 1.42 times slower than vanilla strscpy(). For the 95th percentile, the fortified version is about 1.50 times slower. So, clearly the stats are not in favor of fortified strscpy(). But, the fortified version loops the string twice (one in strnlen() and another in vanilla strscpy()) while the vanilla one only loops once. This can explain why fortified strscpy() is slower than the vanilla one. This patch (of 5): When the fortify feature was first introduced in commit 6974f0c4555e ("include/linux/string.h: add the option of fortified string.h functions"), Daniel Micay observed: * It should be possible to optionally use __builtin_object_size(x, 1) for some functions (C strings) to detect intra-object overflows (like glibc's _FORTIFY_SOURCE=2), but for now this takes the conservative approach to avoid likely compatibility issues. This is a case that often cannot be caught by KASAN. Consider: struct foo { char a[10]; char b[10]; } void test() { char *msg; struct foo foo; msg = kmalloc(16, GFP_KERNEL); strcpy(msg, "Hello world!!"); // this copy overwrites foo.b strcpy(foo.a, msg); } The questionable copy overflows foo.a and writes to foo.b as well. It cannot be detected by KASAN. Currently it is also not detected by fortify, because strcpy considers __builtin_object_size(x, 0), which considers the size of the surrounding object (here, struct foo). However, if we switch the string functions over to use __builtin_object_size(x, 1), the compiler will measure the size of the closest surrounding subobject (here, foo.a), rather than the size of the surrounding object as a whole. See https://gcc.gnu.org/onlinedocs/gcc/Object-Size-Checking.html for more info. Only do this for string functions: we cannot use it on things like memcpy, memmove, memcmp and memchr_inv due to code like this which purposefully operates on multiple structure members: (arch/x86/kernel/traps.c) /* * regs->sp points to the failing IRET frame on the * ESPFIX64 stack. Copy it to the entry stack. This fills * in gpregs->ss through gpregs->ip. * */ memmove(&gpregs->ip, (void *)regs->sp, 5*8); This change passes an allyesconfig on powerpc and x86, and an x86 kernel built with it survives running with syz-stress from syzkaller, so it seems safe so far. Link: https://lkml.kernel.org/r/20201122162451.27551-1-laniel_francis@privacyrequired.com Link: https://lkml.kernel.org/r/20201122162451.27551-2-laniel_francis@privacyrequired.com Signed-off-by: Daniel Axtens Signed-off-by: Francis Laniel Reviewed-by: Kees Cook Cc: Daniel Micay Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index b1f3894a0a3e..46e91d684c47 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -292,7 +292,7 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __builtin_object_size(p, 1); if (__builtin_constant_p(size) && p_size < size) __write_overflow(); if (p_size < size) @@ -302,7 +302,7 @@ __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) __FORTIFY_INLINE char *strcat(char *p, const char *q) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __builtin_object_size(p, 1); if (p_size == (size_t)-1) return __underlying_strcat(p, q); if (strlcat(p, q, p_size) >= p_size) @@ -313,7 +313,7 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q) __FORTIFY_INLINE __kernel_size_t strlen(const char *p) { __kernel_size_t ret; - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __builtin_object_size(p, 1); /* Work around gcc excess stack consumption issue */ if (p_size == (size_t)-1 || @@ -328,7 +328,7 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p) extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __builtin_object_size(p, 1); __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); if (p_size <= ret && maxlen != ret) fortify_panic(__func__); @@ -340,8 +340,8 @@ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) { size_t ret; - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __builtin_object_size(p, 1); + size_t q_size = __builtin_object_size(q, 1); if (p_size == (size_t)-1 && q_size == (size_t)-1) return __real_strlcpy(p, q, size); ret = strlen(q); @@ -361,8 +361,8 @@ __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) __FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) { size_t p_len, copy_len; - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __builtin_object_size(p, 1); + size_t q_size = __builtin_object_size(q, 1); if (p_size == (size_t)-1 && q_size == (size_t)-1) return __underlying_strncat(p, q, count); p_len = strlen(p); @@ -475,11 +475,16 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) /* defined after fortified strlen and memcpy to reuse them */ __FORTIFY_INLINE char *strcpy(char *p, const char *q) { - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __builtin_object_size(p, 1); + size_t q_size = __builtin_object_size(q, 1); + size_t size; if (p_size == (size_t)-1 && q_size == (size_t)-1) return __underlying_strcpy(p, q); - memcpy(p, q, strlen(q) + 1); + size = strlen(q) + 1; + /* test here to use the more stringent object size */ + if (p_size < size) + fortify_panic(__func__); + memcpy(p, q, size); return p; } -- cgit v1.2.3 From 33e56a59e64dfb68778e5da0be13f0c47dc5d445 Mon Sep 17 00:00:00 2001 From: Francis Laniel Date: Tue, 15 Dec 2020 20:43:50 -0800 Subject: string.h: add FORTIFY coverage for strscpy() The fortified version of strscpy ensures the following before vanilla strscpy is called: 1. There is no read overflow because we either size is smaller than src length or we shrink size to src length by calling fortified strnlen. 2. There is no write overflow because we either failed during compilation or at runtime by checking that size is smaller than dest size. Link: https://lkml.kernel.org/r/20201122162451.27551-4-laniel_francis@privacyrequired.com Signed-off-by: Francis Laniel Acked-by: Kees Cook Cc: Daniel Axtens Cc: Daniel Micay Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 46e91d684c47..1cd63a8a23ab 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -6,6 +6,7 @@ #include /* for inline */ #include /* for size_t */ #include /* for NULL */ +#include /* for E2BIG */ #include #include @@ -357,6 +358,53 @@ __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) return ret; } +/* defined after fortified strnlen to reuse it */ +extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); +__FORTIFY_INLINE ssize_t strscpy(char *p, const char *q, size_t size) +{ + size_t len; + /* Use string size rather than possible enclosing struct size. */ + size_t p_size = __builtin_object_size(p, 1); + size_t q_size = __builtin_object_size(q, 1); + + /* If we cannot get size of p and q default to call strscpy. */ + if (p_size == (size_t) -1 && q_size == (size_t) -1) + return __real_strscpy(p, q, size); + + /* + * If size can be known at compile time and is greater than + * p_size, generate a compile time write overflow error. + */ + if (__builtin_constant_p(size) && size > p_size) + __write_overflow(); + + /* + * This call protects from read overflow, because len will default to q + * length if it smaller than size. + */ + len = strnlen(q, size); + /* + * If len equals size, we will copy only size bytes which leads to + * -E2BIG being returned. + * Otherwise we will copy len + 1 because of the final '\O'. + */ + len = len == size ? size : len + 1; + + /* + * Generate a runtime write overflow error if len is greater than + * p_size. + */ + if (len > p_size) + fortify_panic(__func__); + + /* + * We can now safely call vanilla strscpy because we are protected from: + * 1. Read overflow thanks to call to strnlen(). + * 2. Write overflow thanks to above ifs. + */ + return __real_strscpy(p, q, len); +} + /* defined after fortified strlen and strnlen to reuse them */ __FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) { -- cgit v1.2.3 From 5c7b3280d221b84a675b85cb2727df7d82b65c3a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 15 Dec 2020 20:45:34 -0800 Subject: rapidio: remove unused rio_get_asm() and rio_get_device() The functions rio_get_asm() and rio_get_device() are globally exported but have almost no users in tree. The only user is rio_init_mports() which invokes it via rio_init(). rio_init() iterates over every registered device and invokes rio_fixup_device(). It looks like a fixup function which should perform a "change" to the device but does nothing. It has been like this since its introduction in commit 394b701ce4fbf ("[PATCH] RapidIO support: core base") which was merged into v2.6.15-rc1. Remove rio_init() because the performed fixup function (rio_fixup_device()) does nothing. Remove rio_get_asm() and rio_get_device() which have no callers now. Link: https://lkml.kernel.org/r/20201116170004.420143-1-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Cc: Matt Porter Cc: Alexandre Bounine Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rio_drv.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index d637742e5039..e49c32b0f394 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -444,9 +444,6 @@ static inline void rio_set_drvdata(struct rio_dev *rdev, void *data) /* Misc driver helpers */ extern u16 rio_local_get_device_id(struct rio_mport *port); extern void rio_local_set_device_id(struct rio_mport *port, u16 did); -extern struct rio_dev *rio_get_device(u16 vid, u16 did, struct rio_dev *from); -extern struct rio_dev *rio_get_asm(u16 vid, u16 did, u16 asm_vid, u16 asm_did, - struct rio_dev *from); extern int rio_init_mports(void); #endif /* LINUX_RIO_DRV_H */ -- cgit v1.2.3 From 3d03295a7e9194c2318977b44999972ce3609664 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 15 Dec 2020 20:45:47 -0800 Subject: relay: remove unused buf_mapped and buf_unmapped callbacks Patch series "relay: cleanup and const callbacks", v2. None of the relay users require the use of mutable structs for callbacks, however the relay code does. Instead of assigning default callbacks when there is none, add callback wrappers to conditionally call the client callbacks if available, and fall back to default behaviour (typically no-op) otherwise. This lets all relay users make their struct rchan_callbacks const data. This series starts with a number of cleanups first based on Christoph's feedback. This patch (of 9): No relay client uses the buf_mapped or buf_unmapped callbacks. Remove them. This makes relay's vm_operations_struct close callback a dummy, remove it as well. Link: https://lkml.kernel.org/r/cover.1606153547.git.jani.nikula@intel.com Link: https://lkml.kernel.org/r/c69fff6e0cd485563604240bbfcc028434983bec.1606153547.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Cc: Jens Axboe Cc: Kalle Valo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relay.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/relay.h b/include/linux/relay.h index e13a333e7c37..b3c4f49f6951 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -101,25 +101,6 @@ struct rchan_callbacks void *prev_subbuf, size_t prev_padding); - /* - * buf_mapped - relay buffer mmap notification - * @buf: the channel buffer - * @filp: relay file pointer - * - * Called when a relay file is successfully mmapped - */ - void (*buf_mapped)(struct rchan_buf *buf, - struct file *filp); - - /* - * buf_unmapped - relay buffer unmap notification - * @buf: the channel buffer - * @filp: relay file pointer - * - * Called when a relay file is successfully unmapped - */ - void (*buf_unmapped)(struct rchan_buf *buf, - struct file *filp); /* * create_buf_file - create file to represent a relay channel buffer * @filename: the name of the file to create -- cgit v1.2.3 From 371e03880d9d34534d3eafd2a7581042be598e39 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 15 Dec 2020 20:45:53 -0800 Subject: relay: make create_buf_file and remove_buf_file callbacks mandatory All clients provide create_buf_file and remove_buf_file callbacks, and they're required for relay to make sense. There is no point in them being optional. Also document whether each callback is mandatory/optional. Link: https://lkml.kernel.org/r/88003c1527386b93036e286e7917f1e33aec84ac.1606153547.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Cc: Jens Axboe Cc: Kalle Valo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relay.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/relay.h b/include/linux/relay.h index b3c4f49f6951..99d024475ba5 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -89,6 +89,8 @@ struct rchan_callbacks * The client should return 1 to continue logging, 0 to stop * logging. * + * This callback is optional. + * * NOTE: subbuf_start will also be invoked when the buffer is * created, so that the first sub-buffer can be initialized * if necessary. In this case, prev_subbuf will be NULL. @@ -122,6 +124,8 @@ struct rchan_callbacks * cause relay_open() to create a single global buffer rather * than the default set of per-cpu buffers. * + * This callback is mandatory. + * * See Documentation/filesystems/relay.rst for more info. */ struct dentry *(*create_buf_file)(const char *filename, @@ -139,6 +143,8 @@ struct rchan_callbacks * channel buffer. * * The callback should return 0 if successful, negative if not. + * + * This callback is mandatory. */ int (*remove_buf_file)(struct dentry *dentry); }; -- cgit v1.2.3 From 023542f48b57d6b785fcadb86ac336ae80653e58 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 15 Dec 2020 20:45:57 -0800 Subject: relay: allow the use of const callback structs None of the relay users require the use of mutable structs for callbacks, however the relay code does. Instead of assigning the default callback for subbuf_start, add a wrapper to conditionally call the client callback if available, and fall back to default behaviour otherwise. This lets all relay users make their struct rchan_callbacks const data. [jani.nikula@intel.com: cleanups, per Christoph] Link: https://lkml.kernel.org/r/20201124115412.32402-1-jani.nikula@intel.com Link: https://lkml.kernel.org/r/cc3ff292e4eb4fdc56bee3d690c7b8e39209cd37.1606153547.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Reviewed-by: Christoph Hellwig Cc: Jens Axboe Cc: Kalle Valo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relay.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/relay.h b/include/linux/relay.h index 99d024475ba5..72b876dd5cb8 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -62,7 +62,7 @@ struct rchan size_t subbuf_size; /* sub-buffer size */ size_t n_subbufs; /* number of sub-buffers per buffer */ size_t alloc_size; /* total buffer size allocated */ - struct rchan_callbacks *cb; /* client callbacks */ + const struct rchan_callbacks *cb; /* client callbacks */ struct kref kref; /* channel refcount */ void *private_data; /* for user-defined data */ size_t last_toobig; /* tried to log event > subbuf size */ @@ -157,7 +157,7 @@ struct rchan *relay_open(const char *base_filename, struct dentry *parent, size_t subbuf_size, size_t n_subbufs, - struct rchan_callbacks *cb, + const struct rchan_callbacks *cb, void *private_data); extern int relay_late_setup_files(struct rchan *chan, const char *base_filename, -- cgit v1.2.3 From ff5c19ed4b087073cea38ff0edc80c23d7256943 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Dec 2020 20:47:23 -0800 Subject: mm: simplify follow_pte{,pmd} Merge __follow_pte_pmd, follow_pte_pmd and follow_pte into a single follow_pte function and just pass two additional NULL arguments for the two previous follow_pte callers. [sfr@canb.auug.org.au: merge fix for "s390/pci: remove races against pte updates"] Link: https://lkml.kernel.org/r/20201111221254.7f6a3658@canb.auug.org.au Link: https://lkml.kernel.org/r/20201029101432.47011-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Matthew Wilcox (Oracle) Cc: Daniel Vetter Cc: Dan Williams Cc: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index abc7b3154298..855161080f18 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1641,9 +1641,9 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); -int follow_pte_pmd(struct mm_struct *mm, unsigned long address, - struct mmu_notifier_range *range, - pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); +int follow_pte(struct mm_struct *mm, unsigned long address, + struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, + spinlock_t **ptlp); int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn); int follow_phys(struct vm_area_struct *vma, unsigned long address, -- cgit v1.2.3 From c18e68696fdd9fd293f051030bce5aaff3c9b185 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Mon, 14 Dec 2020 21:15:47 -0800 Subject: net/connector: Add const qualifier to cb_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The connector driver never modifies any cb_id passed to it, so add a const qualifier to those arguments so callers can declare their struct cb_id as a constant object. Fixes build warnings like these when passing a constant struct cb_id: warning: passing argument 1 of ‘cn_add_callback’ discards ‘const’ qualifier from pointer target Signed-off-by: Geoff Levand Link: https://lore.kernel.org/r/a9e49c9e-67fa-16e7-0a6b-72f6bd30c58a@infradead.org Signed-off-by: Jakub Kicinski --- include/linux/connector.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index cb732643471b..8ea860efea37 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -64,14 +64,14 @@ struct cn_dev { * @callback: connector's callback. * parameters are %cn_msg and the sender's credentials */ -int cn_add_callback(struct cb_id *id, const char *name, +int cn_add_callback(const struct cb_id *id, const char *name, void (*callback)(struct cn_msg *, struct netlink_skb_parms *)); /** * cn_del_callback() - Unregisters new callback with connector core. * * @id: unique connector's user identifier. */ -void cn_del_callback(struct cb_id *id); +void cn_del_callback(const struct cb_id *id); /** @@ -122,14 +122,14 @@ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 group, gfp_t gfp_mask); int cn_queue_add_callback(struct cn_queue_dev *dev, const char *name, - struct cb_id *id, + const struct cb_id *id, void (*callback)(struct cn_msg *, struct netlink_skb_parms *)); -void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); +void cn_queue_del_callback(struct cn_queue_dev *dev, const struct cb_id *id); void cn_queue_release_callback(struct cn_callback_entry *); struct cn_queue_dev *cn_queue_alloc_dev(const char *name, struct sock *); void cn_queue_free_dev(struct cn_queue_dev *dev); -int cn_cb_equal(struct cb_id *, struct cb_id *); +int cn_cb_equal(const struct cb_id *, const struct cb_id *); #endif /* __CONNECTOR_H */ -- cgit v1.2.3 From 7061eb8cfa902daa1ec71d23b5cddb8b4391e72b Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Mon, 14 Dec 2020 15:19:28 -0600 Subject: net: core: introduce __netdev_notify_peers There are some use cases for netdev_notify_peers in the context when rtnl lock is already held. Introduce lockless version of netdev_notify_peers call to save the extra code to call call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev); After that, convert netdev_notify_peers to call the new helper. Suggested-by: Nathan Lynch Signed-off-by: Lijun Pan Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7bf167993c05..259be67644e3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4547,6 +4547,7 @@ void __dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); int dev_set_allmulti(struct net_device *dev, int inc); void netdev_state_change(struct net_device *dev); +void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ -- cgit v1.2.3 From 767143a18d6d743d4254de5cf55b1bd87bb2af18 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Dec 2020 22:37:50 -0800 Subject: phy: fix kdoc warning Kdoc does not like it when multiline comment follows the networking style of starting right on the first line: include/linux/phy.h:869: warning: Function parameter or member 'config_intr' not described in 'phy_driver' Link: https://lore.kernel.org/r/20201215063750.3120976-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 381a95732b6a..9effb511acde 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -743,7 +743,8 @@ struct phy_driver { /** @read_status: Determines the negotiated speed and duplex */ int (*read_status)(struct phy_device *phydev); - /** @config_intr: Enables or disables interrupts. + /** + * @config_intr: Enables or disables interrupts. * It should also clear any pending interrupts prior to enabling the * IRQs and after disabling them. */ -- cgit v1.2.3 From 3df23a316c4a5d1764b034c71c29d67a17d5299f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 5 Dec 2020 17:19:24 +0100 Subject: pwm: Remove unused function pwmchip_add_inversed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is only defined with CONFIG_PWM unset and was introduced together with pwmchip_add_with_polarity() (which is only defined with CONFIG_PWM enabled). I guess the series that introduced pwmchip_add_with_polarity() had a different concept in earlier revisions and the !CONFIG_PWM part was just not updated accordingly. Given that there is no implementation for pwmchip_add_with_polarity() without CONFIG_PWM, just drop pwmchip_add_inversed() instead of renaming it to pwmchip_add_with_polarity(). Signed-off-by: Uwe Kleine-König Acked-by: Lee Jones Signed-off-by: Thierry Reding --- include/linux/pwm.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index a13ff383fa1d..e4d84d4db293 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -473,11 +473,6 @@ static inline int pwmchip_add(struct pwm_chip *chip) return -EINVAL; } -static inline int pwmchip_add_inversed(struct pwm_chip *chip) -{ - return -EINVAL; -} - static inline int pwmchip_remove(struct pwm_chip *chip) { return -EINVAL; -- cgit v1.2.3 From 49e27134f6e9ebcd08c04a98ab7f0574b5a81a35 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 13 Dec 2020 14:06:41 +0200 Subject: net/mlx5: Fix compilation warning for 32-bit platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MLX5_GENERAL_OBJECT_TYPES types bitfield is 64-bit field. Defining an enum for such bit fields on 32-bit platform results in below warning. ./include/vdso/bits.h:7:26: warning: left shift count >= width of type [-Wshift-count-overflow] ^ ./include/linux/mlx5/mlx5_ifc.h:10716:46: note: in expansion of macro ‘BIT’ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT(0x20), ^~~ Use 32-bit friendly BIT_ULL macro. Fixes: 2a2970891647 ("net/mlx5: Add sample offload hardware bits and structures") Signed-off-by: Parav Pandit Reported-by: Stephen Rothwell Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20201213120641.216032-1-leon@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 0d6e287d614f..8fbddec26eb8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10711,9 +10711,9 @@ struct mlx5_ifc_affiliated_event_header_bits { }; enum { - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT(0xc), - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT(0x13), - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT(0x20), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT_ULL(0xc), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT_ULL(0x13), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT_ULL(0x20), }; enum { -- cgit v1.2.3 From 9bd23c31f392bda88618008f27fd52ee9e0fac38 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Fri, 20 Nov 2020 12:22:32 -0800 Subject: jbd2: add a helper to find out number of fast commit blocks Add a helper to read number of fast commit blocks from jbd2 superblock and also rename the JBD2_MIN_FC_BLKS to JBD2_DEFAULT_FAST_COMMIT_BLOCKS since this constant is just the default number of fast commit blocks to use in case number of fast commit blocks isn't set in jbd2 superblock. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201120202232.2240293-2-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d2a4860feb72..99d3cd051ac3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -68,7 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags); extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 -#define JBD2_MIN_FC_BLOCKS 256 +#define JBD2_DEFAULT_FAST_COMMIT_BLOCKS 256 #ifdef __KERNEL__ @@ -1692,6 +1692,13 @@ static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) return journal->j_chksum_driver != NULL; } +static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb) +{ + int num_fc_blocks = be32_to_cpu(jsb->s_num_fc_blks); + + return num_fc_blocks ? num_fc_blocks : JBD2_DEFAULT_FAST_COMMIT_BLOCKS; +} + /* * Return number of free blocks in the log. Must be called under j_state_lock. */ -- cgit v1.2.3 From 476c135e321716ad7a8a5d4a19a636e2dcc50526 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Nov 2020 08:39:59 +0200 Subject: vdpa: Add missing comment for virtqueue count Add missing comment for number of virtqueue. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Acked-by: Jason Wang Link: https://lore.kernel.org/r/20201112064005.349268-2-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 30bc7a7223bb..0fefeb976877 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -42,6 +42,7 @@ struct vdpa_vq_state { * @config: the configuration ops for this device. * @index: device index * @features_valid: were features initialized? for legacy guests + * @nvqs: maximum number of supported virtqueues */ struct vdpa_device { struct device dev; -- cgit v1.2.3 From a4055888629bc0467d12d912cd7c90acdf3d9b12 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 18 Dec 2020 14:01:31 -0800 Subject: mm/memcg: warning on !memcg after readahead page charged Add VM_WARN_ON_ONCE_PAGE() macro. Since readahead page is charged on memcg too, in theory we don't have to check this exception now. Before safely remove them all, add a warning for the unexpected !memcg. Link: https://lkml.kernel.org/r/1604283436-18880-3-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Acked-by: Michal Hocko Acked-by: Hugh Dickins Acked-by: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmdebug.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 2ad72d2c8cc5..5d0767cb424a 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm); BUG(); \ } \ } while (0) +#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \ + static bool __section(".data.once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) + #define VM_WARN_ON(cond) (void)WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) @@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm); #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond) #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif -- cgit v1.2.3 From bec78efd0061365a76f88e498affd7106b256823 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 18 Dec 2020 14:01:35 -0800 Subject: mm/memcg: remove unused definitions Some definitions are left unused, just clean them. Link: https://lkml.kernel.org/r/20201108003834.12669-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Reviewed-by: Roman Gushchin Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 118 --------------------------------------------- 1 file changed, 118 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 08ed57e02b73..196441f5dc99 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -913,41 +913,6 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, local_irq_restore(flags); } -/** - * mod_memcg_page_state - update page state statistics - * @page: the page - * @idx: page state item to account - * @val: number of pages (positive or negative) - * - * The @page must be locked or the caller must use lock_page_memcg() - * to prevent double accounting when the page is concurrently being - * moved to another memcg: - * - * lock_page(page) or lock_page_memcg(page) - * if (TestClearPageState(page)) - * mod_memcg_page_state(page, state, -1); - * unlock_page(page) or unlock_page_memcg(page) - * - * Kernel pages are an exception to this, since they'll never move. - */ -static inline void __mod_memcg_page_state(struct page *page, - int idx, int val) -{ - struct mem_cgroup *memcg = page_memcg(page); - - if (memcg) - __mod_memcg_state(memcg, idx, val); -} - -static inline void mod_memcg_page_state(struct page *page, - int idx, int val) -{ - struct mem_cgroup *memcg = page_memcg(page); - - if (memcg) - mod_memcg_state(memcg, idx, val); -} - static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { @@ -1395,18 +1360,6 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, { } -static inline void __mod_memcg_page_state(struct page *page, - int idx, - int nr) -{ -} - -static inline void mod_memcg_page_state(struct page *page, - int idx, - int nr) -{ -} - static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { @@ -1479,34 +1432,6 @@ static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) } #endif /* CONFIG_MEMCG */ -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __inc_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - __mod_memcg_state(memcg, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __dec_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - __mod_memcg_state(memcg, idx, -1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __inc_memcg_page_state(struct page *page, - int idx) -{ - __mod_memcg_page_state(page, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __dec_memcg_page_state(struct page *page, - int idx) -{ - __mod_memcg_page_state(page, idx, -1); -} - static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) { __mod_lruvec_kmem_state(p, idx, 1); @@ -1517,34 +1442,6 @@ static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx) __mod_lruvec_kmem_state(p, idx, -1); } -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void inc_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - mod_memcg_state(memcg, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void dec_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - mod_memcg_state(memcg, idx, -1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void inc_memcg_page_state(struct page *page, - int idx) -{ - mod_memcg_page_state(page, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void dec_memcg_page_state(struct page *page, - int idx) -{ - mod_memcg_page_state(page, idx, -1); -} - static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) { struct mem_cgroup *memcg; @@ -1733,21 +1630,6 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order) __memcg_kmem_uncharge_page(page, order); } -static inline int memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp, - unsigned int nr_pages) -{ - if (memcg_kmem_enabled()) - return __memcg_kmem_charge(memcg, gfp, nr_pages); - return 0; -} - -static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg, - unsigned int nr_pages) -{ - if (memcg_kmem_enabled()) - __memcg_kmem_uncharge(memcg, nr_pages); -} - /* * A helper for accessing memcg's kmem_id, used for getting * corresponding LRU lists. -- cgit v1.2.3 From 9a1ac2288cf16f9406ca54ef221bfcf262393b15 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Fri, 18 Dec 2020 14:01:41 -0800 Subject: mm/memcontrol:rewrite mem_cgroup_page_lruvec() mem_cgroup_page_lruvec() in memcontrol.c and mem_cgroup_lruvec() in memcontrol.h is very similar except for the param(page and memcg) which also can be convert to each other. So rewrite mem_cgroup_page_lruvec() with mem_cgroup_lruvec(). [alex.shi@linux.alibaba.com: add missed warning in mem_cgroup_lruvec] Link: https://lkml.kernel.org/r/94f17bb7-ec61-5b72-3555-fabeb5a4d73b@linux.alibaba.com [lstoakes@gmail.com: warn on missing memcg on mem_cgroup_page_lruvec()] Link: https://lkml.kernel.org/r/20201125112202.387009-1-lstoakes@gmail.com Link: https://lkml.kernel.org/r/20201108143731.GA74138@rlk Signed-off-by: Hui Su Signed-off-by: Alex Shi Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Vladimir Davydov Cc: Yafang Shao Cc: Chris Down Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 196441f5dc99..d827bd7f3bfe 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -620,9 +620,10 @@ mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid) /** * mem_cgroup_lruvec - get the lru list vector for a memcg & node * @memcg: memcg of the wanted lruvec + * @pgdat: pglist_data * * Returns the lru list vector holding pages for a given @memcg & - * @node combination. This can be the node lruvec, if the memory + * @pgdat combination. This can be the node lruvec, if the memory * controller is disabled. */ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, @@ -652,7 +653,21 @@ out: return lruvec; } -struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *); +/** + * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page + * @page: the page + * @pgdat: pgdat of the page + * + * This function relies on page->mem_cgroup being stable. + */ +static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, + struct pglist_data *pgdat) +{ + struct mem_cgroup *memcg = page_memcg(page); + + VM_WARN_ON_ONCE_PAGE(!memcg, page); + return mem_cgroup_lruvec(memcg, pgdat); +} static inline bool lruvec_holds_page_lru_lock(struct page *page, struct lruvec *lruvec) -- cgit v1.2.3 From b0a0c2615f6f199a656ed8549d7dce625d77aa77 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 18 Dec 2020 14:05:41 -0800 Subject: epoll: wire up syscall epoll_pwait2 Split off from prev patch in the series that implements the syscall. Link: https://lkml.kernel.org/r/20201121144401.3727659-4-willemdebruijn.kernel@gmail.com Signed-off-by: Willem de Bruijn Cc: Al Viro Cc: Arnd Bergmann Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 6 ++++++ include/linux/syscalls.h | 5 +++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 400c0941c8af..6e65be753603 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -537,6 +537,12 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, int maxevents, int timeout, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); +asmlinkage long compat_sys_epoll_pwait2(int epfd, + struct epoll_event __user *events, + int maxevents, + const struct __kernel_timespec __user *timeout, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize); /* fs/fcntl.c */ asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index df0c3c74609e..f3929aff39cf 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -362,6 +362,11 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, const sigset_t __user *sigmask, size_t sigsetsize); +asmlinkage long sys_epoll_pwait2(int epfd, struct epoll_event __user *events, + int maxevents, + const struct __kernel_timespec __user *timeout, + const sigset_t __user *sigmask, + size_t sigsetsize); /* fs/fcntl.c */ asmlinkage long sys_dup(unsigned int fildes); -- cgit v1.2.3 From 3b1a4a8640876a966ab68ab4f561642e19674671 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:14 -0800 Subject: kasan: group vmalloc code This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Group all vmalloc-related function declarations in include/linux/kasan.h, and their implementations in mm/kasan/common.c. No functional changes. Link: https://lkml.kernel.org/r/80a6fdd29b039962843bd6cf22ce2643a7c8904e.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 30d343b4a40a..59538e795df4 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -75,19 +75,6 @@ struct kasan_cache { int free_meta_offset; }; -/* - * These functions provide a special case to support backing module - * allocations with real shadow memory. With KASAN vmalloc, the special - * case is unnecessary, as the work is handled in the generic case. - */ -#ifndef CONFIG_KASAN_VMALLOC -int kasan_module_alloc(void *addr, size_t size); -void kasan_free_shadow(const struct vm_struct *vm); -#else -static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } -static inline void kasan_free_shadow(const struct vm_struct *vm) {} -#endif - int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); @@ -156,9 +143,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } -static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } -static inline void kasan_free_shadow(const struct vm_struct *vm) {} - static inline int kasan_add_zero_shadow(void *start, unsigned long size) { return 0; @@ -211,13 +195,16 @@ static inline void *kasan_reset_tag(const void *addr) #endif /* CONFIG_KASAN_SW_TAGS */ #ifdef CONFIG_KASAN_VMALLOC + int kasan_populate_vmalloc(unsigned long addr, unsigned long size); void kasan_poison_vmalloc(const void *start, unsigned long size); void kasan_unpoison_vmalloc(const void *start, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -#else + +#else /* CONFIG_KASAN_VMALLOC */ + static inline int kasan_populate_vmalloc(unsigned long start, unsigned long size) { @@ -232,7 +219,25 @@ static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end) {} -#endif + +#endif /* CONFIG_KASAN_VMALLOC */ + +#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC) + +/* + * These functions provide a special case to support backing module + * allocations with real shadow memory. With KASAN vmalloc, the special + * case is unnecessary, as the work is handled in the generic case. + */ +int kasan_module_alloc(void *addr, size_t size); +void kasan_free_shadow(const struct vm_struct *vm); + +#else /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ + +static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline void kasan_free_shadow(const struct vm_struct *vm) {} + +#endif /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ #ifdef CONFIG_KASAN_INLINE void kasan_non_canonical_hook(unsigned long addr); -- cgit v1.2.3 From d5750edf6da759576f91ec2b57d5553985815b40 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:17 -0800 Subject: kasan: shadow declarations only for software modes This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Group shadow-related KASAN function declarations and only define them for the two existing software modes. No functional changes for software modes. Link: https://lkml.kernel.org/r/35126.1606402815@turing-police Link: https://lore.kernel.org/linux-arm-kernel/24105.1606397102@turing-police/ Link: https://lkml.kernel.org/r/e88d94eff94db883a65dca52e1736d80d28dd9bc.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Signed-off-by: Valdis Kletnieks Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon [valdis.kletnieks@vt.edu: fix build issue with asmlinkage] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 59538e795df4..7828436a3a99 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -11,7 +11,7 @@ struct task_struct; #ifdef CONFIG_KASAN -#include +#include #include /* kasan_data struct is used in KUnit tests for KASAN expected failures */ @@ -20,6 +20,20 @@ struct kunit_kasan_expectation { bool report_found; }; +#endif + +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) + +#include + +/* Software KASAN implementations use shadow memory. */ + +#ifdef CONFIG_KASAN_SW_TAGS +#define KASAN_SHADOW_INIT 0xFF +#else +#define KASAN_SHADOW_INIT 0 +#endif + extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; @@ -35,6 +49,23 @@ static inline void *kasan_mem_to_shadow(const void *addr) + KASAN_SHADOW_OFFSET; } +int kasan_add_zero_shadow(void *start, unsigned long size); +void kasan_remove_zero_shadow(void *start, unsigned long size); + +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +static inline int kasan_add_zero_shadow(void *start, unsigned long size) +{ + return 0; +} +static inline void kasan_remove_zero_shadow(void *start, + unsigned long size) +{} + +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +#ifdef CONFIG_KASAN + /* Enable reporting bugs after kasan_disable_current() */ extern void kasan_enable_current(void); @@ -75,9 +106,6 @@ struct kasan_cache { int free_meta_offset; }; -int kasan_add_zero_shadow(void *start, unsigned long size); -void kasan_remove_zero_shadow(void *start, unsigned long size); - size_t __ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { @@ -143,14 +171,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } -static inline int kasan_add_zero_shadow(void *start, unsigned long size) -{ - return 0; -} -static inline void kasan_remove_zero_shadow(void *start, - unsigned long size) -{} - static inline void kasan_unpoison_slab(const void *ptr) { } static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } @@ -158,8 +178,6 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #ifdef CONFIG_KASAN_GENERIC -#define KASAN_SHADOW_INIT 0 - void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); @@ -174,8 +192,6 @@ static inline void kasan_record_aux_stack(void *ptr) {} #ifdef CONFIG_KASAN_SW_TAGS -#define KASAN_SHADOW_INIT 0xFF - void kasan_init_tags(void); void *kasan_reset_tag(const void *addr); -- cgit v1.2.3 From cebd0eb29acdfc2f5e44e5f356ffcd0c44f16b4a Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:21 -0800 Subject: kasan: rename (un)poison_shadow to (un)poison_range This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. The new mode won't be using shadow memory. Rename external annotation kasan_unpoison_shadow() to kasan_unpoison_range(), and introduce internal functions (un)poison_range() (without kasan_ prefix). Co-developed-by: Marco Elver Link: https://lkml.kernel.org/r/fccdcaa13dc6b2211bf363d6c6d499279a54fe3a.1606161801.git.andreyknvl@google.com Signed-off-by: Marco Elver Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 7828436a3a99..9740c06a04a1 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -72,7 +72,7 @@ extern void kasan_enable_current(void); /* Disable reporting bugs for current task */ extern void kasan_disable_current(void); -void kasan_unpoison_shadow(const void *address, size_t size); +void kasan_unpoison_range(const void *address, size_t size); void kasan_unpoison_task_stack(struct task_struct *task); @@ -109,7 +109,7 @@ struct kasan_cache { size_t __ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { - kasan_unpoison_shadow(ptr, __ksize(ptr)); + kasan_unpoison_range(ptr, __ksize(ptr)); } size_t kasan_metadata_size(struct kmem_cache *cache); @@ -118,7 +118,7 @@ void kasan_restore_multi_shot(bool enabled); #else /* CONFIG_KASAN */ -static inline void kasan_unpoison_shadow(const void *address, size_t size) {} +static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_unpoison_task_stack(struct task_struct *task) {} -- cgit v1.2.3 From d73b49365ee65ac48074bdb5aa717bb4644dbbb7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:56 -0800 Subject: kasan, arm64: only use kasan_depth for software modes This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Hardware tag-based KASAN won't use kasan_depth. Only define and use it when one of the software KASAN modes are enabled. No functional changes for software modes. Link: https://lkml.kernel.org/r/e16f15aeda90bc7fb4dfc2e243a14b74cc5c8219.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 18 +++++++++--------- include/linux/sched.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9740c06a04a1..b272960e8396 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -52,6 +52,12 @@ static inline void *kasan_mem_to_shadow(const void *addr) int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); +/* Enable reporting bugs after kasan_disable_current() */ +extern void kasan_enable_current(void); + +/* Disable reporting bugs for current task */ +extern void kasan_disable_current(void); + #else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ static inline int kasan_add_zero_shadow(void *start, unsigned long size) @@ -62,16 +68,13 @@ static inline void kasan_remove_zero_shadow(void *start, unsigned long size) {} +static inline void kasan_enable_current(void) {} +static inline void kasan_disable_current(void) {} + #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ #ifdef CONFIG_KASAN -/* Enable reporting bugs after kasan_disable_current() */ -extern void kasan_enable_current(void); - -/* Disable reporting bugs for current task */ -extern void kasan_disable_current(void); - void kasan_unpoison_range(const void *address, size_t size); void kasan_unpoison_task_stack(struct task_struct *task); @@ -122,9 +125,6 @@ static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_unpoison_task_stack(struct task_struct *task) {} -static inline void kasan_enable_current(void) {} -static inline void kasan_disable_current(void) {} - static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} diff --git a/include/linux/sched.h b/include/linux/sched.h index 51d535b69bd6..6e3a5eeec509 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1234,7 +1234,7 @@ struct task_struct { u64 timer_slack_ns; u64 default_timer_slack_ns; -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) unsigned int kasan_depth; #endif -- cgit v1.2.3 From 60a3a5fe950f4e6c02e9fc6676dc96de043ed743 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:03 -0800 Subject: kasan, arm64: rename kasan_init_tags and mark as __init Rename kasan_init_tags() to kasan_init_sw_tags() as the upcoming hardware tag-based KASAN mode will have its own initialization routine. Also similarly to kasan_init() mark kasan_init_tags() as __init. Link: https://lkml.kernel.org/r/71e52af72a09f4b50c8042f16101c60e50649fbb.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b272960e8396..d7042d129dc1 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -192,7 +192,7 @@ static inline void kasan_record_aux_stack(void *ptr) {} #ifdef CONFIG_KASAN_SW_TAGS -void kasan_init_tags(void); +void __init kasan_init_sw_tags(void); void *kasan_reset_tag(const void *addr); @@ -201,7 +201,7 @@ bool kasan_report(unsigned long addr, size_t size, #else /* CONFIG_KASAN_SW_TAGS */ -static inline void kasan_init_tags(void) { } +static inline void kasan_init_sw_tags(void) { } static inline void *kasan_reset_tag(const void *addr) { -- cgit v1.2.3 From 0fea6e9af889f1a4e072f5de999e07fe6859fc88 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:06 -0800 Subject: kasan, arm64: expand CONFIG_KASAN checks Some #ifdef CONFIG_KASAN checks are only relevant for software KASAN modes (either related to shadow memory or compiler instrumentation). Expand those into CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS. Link: https://lkml.kernel.org/r/e6971e432dbd72bb897ff14134ebb7e169bdcf0c.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan-checks.h | 2 +- include/linux/kasan.h | 7 ++++--- include/linux/moduleloader.h | 3 ++- include/linux/string.h | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h index ac6aba632f2d..ca5e89fb10d3 100644 --- a/include/linux/kasan-checks.h +++ b/include/linux/kasan-checks.h @@ -9,7 +9,7 @@ * even in compilation units that selectively disable KASAN, but must use KASAN * to validate access to an address. Never use these in header files! */ -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) bool __kasan_check_read(const volatile void *p, unsigned int size); bool __kasan_check_write(const volatile void *p, unsigned int size); #else diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d7042d129dc1..b1381ee6922a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -238,7 +238,8 @@ static inline void kasan_release_vmalloc(unsigned long start, #endif /* CONFIG_KASAN_VMALLOC */ -#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC) +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) /* * These functions provide a special case to support backing module @@ -248,12 +249,12 @@ static inline void kasan_release_vmalloc(unsigned long start, int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); -#else /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ +#else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} -#endif /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ +#endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ #ifdef CONFIG_KASAN_INLINE void kasan_non_canonical_hook(unsigned long addr); diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 4fa67a8b2265..9e09d11ffe5b 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -96,7 +96,8 @@ void module_arch_cleanup(struct module *mod); /* Any cleanup before freeing mod->module_init */ void module_arch_freeing_init(struct module *mod); -#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC) +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) #include #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) #else diff --git a/include/linux/string.h b/include/linux/string.h index 1cd63a8a23ab..4fcfb56abcf5 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -267,7 +267,7 @@ void __write_overflow(void) __compiletime_error("detected write beyond size of o #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr); extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp); extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy); -- cgit v1.2.3 From 2e903b91479782b7dedd869603423d77e079d3de Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:10 -0800 Subject: kasan, arm64: implement HW_TAGS runtime Provide implementation of KASAN functions required for the hardware tag-based mode. Those include core functions for memory and pointer tagging (tags_hw.c) and bug reporting (report_tags_hw.c). Also adapt common KASAN code to support the new mode. Link: https://lkml.kernel.org/r/cfd0fbede579a6b66755c98c88c108e54f9c56bf.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Acked-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 24 +++++++++++++++++------- include/linux/mm.h | 2 +- include/linux/page-flags-layout.h | 2 +- 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b1381ee6922a..d22ec4c9c1bd 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -190,25 +190,35 @@ static inline void kasan_record_aux_stack(void *ptr) {} #endif /* CONFIG_KASAN_GENERIC */ -#ifdef CONFIG_KASAN_SW_TAGS - -void __init kasan_init_sw_tags(void); +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) void *kasan_reset_tag(const void *addr); bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); -#else /* CONFIG_KASAN_SW_TAGS */ - -static inline void kasan_init_sw_tags(void) { } +#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ static inline void *kasan_reset_tag(const void *addr) { return (void *)addr; } -#endif /* CONFIG_KASAN_SW_TAGS */ +#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS*/ + +#ifdef CONFIG_KASAN_SW_TAGS +void __init kasan_init_sw_tags(void); +#else +static inline void kasan_init_sw_tags(void) { } +#endif + +#ifdef CONFIG_KASAN_HW_TAGS +void kasan_init_hw_tags_cpu(void); +void __init kasan_init_hw_tags(void); +#else +static inline void kasan_init_hw_tags_cpu(void) { } +static inline void kasan_init_hw_tags(void) { } +#endif #ifdef CONFIG_KASAN_VMALLOC diff --git a/include/linux/mm.h b/include/linux/mm.h index 362579ad0758..024ec0a00c72 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1421,7 +1421,7 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) } #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_KASAN_SW_TAGS +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) static inline u8 page_kasan_tag(const struct page *page) { return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index e200eef6a7fd..7d4ec26d8a3e 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -77,7 +77,7 @@ #define LAST_CPUPID_SHIFT 0 #endif -#ifdef CONFIG_KASAN_SW_TAGS +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) #define KASAN_TAG_WIDTH 8 #else #define KASAN_TAG_WIDTH 0 -- cgit v1.2.3 From d56a9ef84bd0e1e8fba7a837ab12a4ec8476579f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:42 -0800 Subject: kasan, arm64: unpoison stack only with CONFIG_KASAN_STACK There's a config option CONFIG_KASAN_STACK that has to be enabled for KASAN to use stack instrumentation and perform validity checks for stack variables. There's no need to unpoison stack when CONFIG_KASAN_STACK is not enabled. Only call kasan_unpoison_task_stack[_below]() when CONFIG_KASAN_STACK is enabled. Note, that CONFIG_KASAN_STACK is an option that is currently always defined when CONFIG_KASAN is enabled, and therefore has to be tested with #if instead of #ifdef. Link: https://lkml.kernel.org/r/d09dd3f8abb388da397fd11598c5edeaa83fe559.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/If8a891e9fe01ea543e00b576852685afec0887e3 Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Acked-by: Catalin Marinas Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d22ec4c9c1bd..e638255ce906 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -77,8 +77,6 @@ static inline void kasan_disable_current(void) {} void kasan_unpoison_range(const void *address, size_t size); -void kasan_unpoison_task_stack(struct task_struct *task); - void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); @@ -123,8 +121,6 @@ void kasan_restore_multi_shot(bool enabled); static inline void kasan_unpoison_range(const void *address, size_t size) {} -static inline void kasan_unpoison_task_stack(struct task_struct *task) {} - static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} @@ -176,6 +172,12 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #endif /* CONFIG_KASAN */ +#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK +void kasan_unpoison_task_stack(struct task_struct *task); +#else +static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +#endif + #ifdef CONFIG_KASAN_GENERIC void kasan_cache_shrink(struct kmem_cache *cache); -- cgit v1.2.3 From c0054c565ae598073d6c27762c7d4f7de49a45d9 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:52 -0800 Subject: kasan: inline kasan_reset_tag for tag-based modes Using kasan_reset_tag() currently results in a function call. As it's called quite often from the allocator code, this leads to a noticeable slowdown. Move it to include/linux/kasan.h and turn it into a static inline function. Also remove the now unneeded reset_tag() internal KASAN macro and use kasan_reset_tag() instead. Link: https://lkml.kernel.org/r/6940383a3a9dfb416134d338d8fac97a9ebb8686.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I4d2061acfe91d480a75df00b07c22d8494ef14b5 Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index e638255ce906..3bb72de94f90 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -194,7 +194,10 @@ static inline void kasan_record_aux_stack(void *ptr) {} #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) -void *kasan_reset_tag(const void *addr); +static inline void *kasan_reset_tag(const void *addr) +{ + return (void *)arch_kasan_reset_tag(addr); +} bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); -- cgit v1.2.3 From bffe690708c8b4fdb8f0bff8ff22b347fc6c709a Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:59 -0800 Subject: kasan: open-code kasan_unpoison_slab There's the external annotation kasan_unpoison_slab() that is currently defined as static inline and uses kasan_unpoison_range(). Open-code this function in mempool.c. Otherwise with an upcoming change this function will result in an unnecessary function call. Link: https://lkml.kernel.org/r/131a6694a978a9a8b150187e539eecc8bcbf759b.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Ia7c8b659f79209935cbaab3913bf7f082cc43a0e Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 3bb72de94f90..7350de3e9fe4 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -107,11 +107,6 @@ struct kasan_cache { int free_meta_offset; }; -size_t __ksize(const void *); -static inline void kasan_unpoison_slab(const void *ptr) -{ - kasan_unpoison_range(ptr, __ksize(ptr)); -} size_t kasan_metadata_size(struct kmem_cache *cache); bool kasan_save_enable_multi_shot(void); @@ -167,7 +162,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } -static inline void kasan_unpoison_slab(const void *ptr) { } static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #endif /* CONFIG_KASAN */ -- cgit v1.2.3 From 34303244f2615add92076a4bf2d4f39323bde4f2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:10 -0800 Subject: kasan, mm: check kasan_enabled in annotations Declare the kasan_enabled static key in include/linux/kasan.h and in include/linux/mm.h and check it in all kasan annotations. This allows to avoid any slowdown caused by function calls when kasan_enabled is disabled. Link: https://lkml.kernel.org/r/9f90e3c0aa840dbb4833367c2335193299f69023.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I2589451d3c96c97abbcbf714baabe6161c6f153e Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 213 +++++++++++++++++++++++++++++++++++++++----------- include/linux/mm.h | 22 ++++-- 2 files changed, 182 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 7350de3e9fe4..9176849c4934 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -2,6 +2,7 @@ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H +#include #include struct kmem_cache; @@ -75,54 +76,176 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN -void kasan_unpoison_range(const void *address, size_t size); +struct kasan_cache { + int alloc_meta_offset; + int free_meta_offset; +}; -void kasan_alloc_pages(struct page *page, unsigned int order); -void kasan_free_pages(struct page *page, unsigned int order); +#ifdef CONFIG_KASAN_HW_TAGS +DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); +static __always_inline bool kasan_enabled(void) +{ + return static_branch_likely(&kasan_flag_enabled); +} +#else +static inline bool kasan_enabled(void) +{ + return true; +} +#endif -void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, - slab_flags_t *flags); +void __kasan_unpoison_range(const void *addr, size_t size); +static __always_inline void kasan_unpoison_range(const void *addr, size_t size) +{ + if (kasan_enabled()) + __kasan_unpoison_range(addr, size); +} -void kasan_poison_slab(struct page *page); -void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); -void kasan_poison_object_data(struct kmem_cache *cache, void *object); -void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, - const void *object); +void __kasan_alloc_pages(struct page *page, unsigned int order); +static __always_inline void kasan_alloc_pages(struct page *page, + unsigned int order) +{ + if (kasan_enabled()) + __kasan_alloc_pages(page, order); +} -void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, - gfp_t flags); -void kasan_kfree_large(void *ptr, unsigned long ip); -void kasan_poison_kfree(void *ptr, unsigned long ip); -void * __must_check kasan_kmalloc(struct kmem_cache *s, const void *object, - size_t size, gfp_t flags); -void * __must_check kasan_krealloc(const void *object, size_t new_size, - gfp_t flags); +void __kasan_free_pages(struct page *page, unsigned int order); +static __always_inline void kasan_free_pages(struct page *page, + unsigned int order) +{ + if (kasan_enabled()) + __kasan_free_pages(page, order); +} -void * __must_check kasan_slab_alloc(struct kmem_cache *s, void *object, - gfp_t flags); -bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); +void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size, + slab_flags_t *flags); +static __always_inline void kasan_cache_create(struct kmem_cache *cache, + unsigned int *size, slab_flags_t *flags) +{ + if (kasan_enabled()) + __kasan_cache_create(cache, size, flags); +} -struct kasan_cache { - int alloc_meta_offset; - int free_meta_offset; -}; +size_t __kasan_metadata_size(struct kmem_cache *cache); +static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache) +{ + if (kasan_enabled()) + return __kasan_metadata_size(cache); + return 0; +} + +void __kasan_poison_slab(struct page *page); +static __always_inline void kasan_poison_slab(struct page *page) +{ + if (kasan_enabled()) + __kasan_poison_slab(page); +} + +void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object); +static __always_inline void kasan_unpoison_object_data(struct kmem_cache *cache, + void *object) +{ + if (kasan_enabled()) + __kasan_unpoison_object_data(cache, object); +} + +void __kasan_poison_object_data(struct kmem_cache *cache, void *object); +static __always_inline void kasan_poison_object_data(struct kmem_cache *cache, + void *object) +{ + if (kasan_enabled()) + __kasan_poison_object_data(cache, object); +} + +void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, + const void *object); +static __always_inline void * __must_check kasan_init_slab_obj( + struct kmem_cache *cache, const void *object) +{ + if (kasan_enabled()) + return __kasan_init_slab_obj(cache, object); + return (void *)object; +} + +bool __kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); +static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object, + unsigned long ip) +{ + if (kasan_enabled()) + return __kasan_slab_free(s, object, ip); + return false; +} + +void * __must_check __kasan_slab_alloc(struct kmem_cache *s, + void *object, gfp_t flags); +static __always_inline void * __must_check kasan_slab_alloc( + struct kmem_cache *s, void *object, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_slab_alloc(s, object, flags); + return object; +} + +void * __must_check __kasan_kmalloc(struct kmem_cache *s, const void *object, + size_t size, gfp_t flags); +static __always_inline void * __must_check kasan_kmalloc(struct kmem_cache *s, + const void *object, size_t size, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_kmalloc(s, object, size, flags); + return (void *)object; +} -size_t kasan_metadata_size(struct kmem_cache *cache); +void * __must_check __kasan_kmalloc_large(const void *ptr, + size_t size, gfp_t flags); +static __always_inline void * __must_check kasan_kmalloc_large(const void *ptr, + size_t size, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_kmalloc_large(ptr, size, flags); + return (void *)ptr; +} + +void * __must_check __kasan_krealloc(const void *object, + size_t new_size, gfp_t flags); +static __always_inline void * __must_check kasan_krealloc(const void *object, + size_t new_size, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_krealloc(object, new_size, flags); + return (void *)object; +} + +void __kasan_poison_kfree(void *ptr, unsigned long ip); +static __always_inline void kasan_poison_kfree(void *ptr, unsigned long ip) +{ + if (kasan_enabled()) + __kasan_poison_kfree(ptr, ip); +} + +void __kasan_kfree_large(void *ptr, unsigned long ip); +static __always_inline void kasan_kfree_large(void *ptr, unsigned long ip) +{ + if (kasan_enabled()) + __kasan_kfree_large(ptr, ip); +} bool kasan_save_enable_multi_shot(void); void kasan_restore_multi_shot(bool enabled); #else /* CONFIG_KASAN */ +static inline bool kasan_enabled(void) +{ + return false; +} static inline void kasan_unpoison_range(const void *address, size_t size) {} - static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} - static inline void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags) {} - +static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} @@ -133,36 +256,32 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache, { return (void *)object; } - -static inline void *kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) +static inline bool kasan_slab_free(struct kmem_cache *s, void *object, + unsigned long ip) { - return ptr; + return false; +} +static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, + gfp_t flags) +{ + return object; } -static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} -static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} static inline void *kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) { return (void *)object; } +static inline void *kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags) +{ + return (void *)ptr; +} static inline void *kasan_krealloc(const void *object, size_t new_size, gfp_t flags) { return (void *)object; } - -static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, - gfp_t flags) -{ - return object; -} -static inline bool kasan_slab_free(struct kmem_cache *s, void *object, - unsigned long ip) -{ - return false; -} - -static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } +static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} +static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} #endif /* CONFIG_KASAN */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 024ec0a00c72..5299b90a6c40 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -31,6 +31,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -1422,22 +1423,30 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) #endif /* CONFIG_NUMA_BALANCING */ #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) + static inline u8 page_kasan_tag(const struct page *page) { - return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; + if (kasan_enabled()) + return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; + return 0xff; } static inline void page_kasan_tag_set(struct page *page, u8 tag) { - page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); - page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; + if (kasan_enabled()) { + page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); + page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; + } } static inline void page_kasan_tag_reset(struct page *page) { - page_kasan_tag_set(page, 0xff); + if (kasan_enabled()) + page_kasan_tag_set(page, 0xff); } -#else + +#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ + static inline u8 page_kasan_tag(const struct page *page) { return 0xff; @@ -1445,7 +1454,8 @@ static inline u8 page_kasan_tag(const struct page *page) static inline void page_kasan_tag_set(struct page *page, u8 tag) { } static inline void page_kasan_tag_reset(struct page *page) { } -#endif + +#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ static inline struct zone *page_zone(const struct page *page) { -- cgit v1.2.3 From eeb3160c2419e0f1045537acac7b19cba64112f4 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:13 -0800 Subject: kasan, mm: rename kasan_poison_kfree Rename kasan_poison_kfree() to kasan_slab_free_mempool() as it better reflects what this annotation does. Also add a comment that explains the PageSlab() check. No functional changes. Link: https://lkml.kernel.org/r/141675fb493555e984c5dca555e9d9f768c7bbaa.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I5026f87364e556b506ef1baee725144bb04b8810 Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9176849c4934..6f0c5d9aa43f 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -176,6 +176,13 @@ static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } +void __kasan_slab_free_mempool(void *ptr, unsigned long ip); +static __always_inline void kasan_slab_free_mempool(void *ptr, unsigned long ip) +{ + if (kasan_enabled()) + __kasan_slab_free_mempool(ptr, ip); +} + void * __must_check __kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); static __always_inline void * __must_check kasan_slab_alloc( @@ -216,13 +223,6 @@ static __always_inline void * __must_check kasan_krealloc(const void *object, return (void *)object; } -void __kasan_poison_kfree(void *ptr, unsigned long ip); -static __always_inline void kasan_poison_kfree(void *ptr, unsigned long ip) -{ - if (kasan_enabled()) - __kasan_poison_kfree(ptr, ip); -} - void __kasan_kfree_large(void *ptr, unsigned long ip); static __always_inline void kasan_kfree_large(void *ptr, unsigned long ip) { @@ -261,6 +261,7 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, { return false; } +static inline void kasan_slab_free_mempool(void *ptr, unsigned long ip) {} static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags) { @@ -280,7 +281,6 @@ static inline void *kasan_krealloc(const void *object, size_t new_size, { return (void *)object; } -static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} #endif /* CONFIG_KASAN */ -- cgit v1.2.3 From e86f8b09f215e3755cd2d56930487dec2de02433 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:31 -0800 Subject: kasan, mm: allow cache merging with no metadata The reason cache merging is disabled with KASAN is because KASAN puts its metadata right after the allocated object. When the merged caches have slightly different sizes, the metadata ends up in different places, which KASAN doesn't support. It might be possible to adjust the metadata allocation algorithm and make it friendly to the cache merging code. Instead this change takes a simpler approach and allows merging caches when no metadata is present. Which is the case for hardware tag-based KASAN with kasan.mode=prod. Link: https://lkml.kernel.org/r/37497e940bfd4b32c0a93a702a9ae4cf061d5392.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Ia114847dfb2244f297d2cb82d592bf6a07455dba Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 6f0c5d9aa43f..5e0655fb2a6f 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -82,17 +82,30 @@ struct kasan_cache { }; #ifdef CONFIG_KASAN_HW_TAGS + DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); + static __always_inline bool kasan_enabled(void) { return static_branch_likely(&kasan_flag_enabled); } -#else + +#else /* CONFIG_KASAN_HW_TAGS */ + static inline bool kasan_enabled(void) { return true; } -#endif + +#endif /* CONFIG_KASAN_HW_TAGS */ + +slab_flags_t __kasan_never_merge(void); +static __always_inline slab_flags_t kasan_never_merge(void) +{ + if (kasan_enabled()) + return __kasan_never_merge(); + return 0; +} void __kasan_unpoison_range(const void *addr, size_t size); static __always_inline void kasan_unpoison_range(const void *addr, size_t size) @@ -239,6 +252,10 @@ static inline bool kasan_enabled(void) { return false; } +static inline slab_flags_t kasan_never_merge(void) +{ + return 0; +} static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} -- cgit v1.2.3