From f5c27da4e3c8a2e42fb4f41a0c685debcb9af294 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 3 Nov 2022 16:57:44 +0100 Subject: HID: initial BPF implementation Declare an entry point that can use fmod_ret BPF programs, and also an API to access and change the incoming data. A simpler implementation would consist in just calling hid_bpf_device_event() for any incoming event and let users deal with the fact that they will be called for any event of any device. The goal of HID-BPF is to partially replace drivers, so this situation can be problematic because we might have programs which will step on each other toes. For that, we add a new API hid_bpf_attach_prog() that can be called from a syscall and we manually deal with a jump table in hid-bpf. Whenever we add a program to the jump table (in other words, when we attach a program to a HID device), we keep the number of time we added this program in the jump table so we can release it whenever there are no other users. HID devices have an RCU protected list of available programs in the jump table, and those programs are called one after the other thanks to bpf_tail_call(). To achieve the detection of users losing their fds on the programs we attached, we add 2 tracing facilities on bpf_prog_release() (for when a fd is closed) and bpf_free_inode() (for when a pinned program gets unpinned). Reviewed-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 5 +++ include/linux/hid_bpf.h | 117 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 include/linux/hid_bpf.h (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 8677ae38599e..cd3c52fae7b1 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -26,6 +26,7 @@ #include #include #include +#include /* * We parse each description item into this structure. Short items data @@ -651,6 +652,10 @@ struct hid_device { /* device report descriptor */ wait_queue_head_t debug_wait; unsigned int id; /* system unique id */ + +#ifdef CONFIG_BPF + struct hid_bpf bpf; /* hid-bpf data */ +#endif /* CONFIG_BPF */ }; #define to_hid_device(pdev) \ diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h new file mode 100644 index 000000000000..de3fb1c376d2 --- /dev/null +++ b/include/linux/hid_bpf.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef __HID_BPF_H +#define __HID_BPF_H + +#include +#include + +struct hid_device; + +/* + * The following is the user facing HID BPF API. + * + * Extra care should be taken when editing this part, as + * it might break existing out of the tree bpf programs. + */ + +/** + * struct hid_bpf_ctx - User accessible data for all HID programs + * + * ``data`` is not directly accessible from the context. We need to issue + * a call to ``hid_bpf_get_data()`` in order to get a pointer to that field. + * + * All of these fields are currently read-only. + * + * @index: program index in the jump table. No special meaning (a smaller index + * doesn't mean the program will be executed before another program with + * a bigger index). + * @hid: the ``struct hid_device`` representing the device itself + * @report_type: used for ``hid_bpf_device_event()`` + * @size: Valid data in the data field. + * + * Programs can get the available valid size in data by fetching this field. + */ +struct hid_bpf_ctx { + __u32 index; + const struct hid_device *hid; + enum hid_report_type report_type; + __s32 size; +}; + +/** + * enum hid_bpf_attach_flags - flags used when attaching a HIF-BPF program + * + * @HID_BPF_FLAG_NONE: no specific flag is used, the kernel choses where to + * insert the program + * @HID_BPF_FLAG_INSERT_HEAD: insert the given program before any other program + * currently attached to the device. This doesn't + * guarantee that this program will always be first + * @HID_BPF_FLAG_MAX: sentinel value, not to be used by the callers + */ +enum hid_bpf_attach_flags { + HID_BPF_FLAG_NONE = 0, + HID_BPF_FLAG_INSERT_HEAD = _BITUL(0), + HID_BPF_FLAG_MAX, +}; + +/* Following functions are tracepoints that BPF programs can attach to */ +int hid_bpf_device_event(struct hid_bpf_ctx *ctx); + +/* Following functions are kfunc that we export to BPF programs */ +/* only available in tracing */ +__u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz); + +/* only available in syscall */ +int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags); + +/* + * Below is HID internal + */ + +/* internal function to call eBPF programs, not to be used by anybody */ +int __hid_bpf_tail_call(struct hid_bpf_ctx *ctx); + +#define HID_BPF_MAX_PROGS_PER_DEV 64 +#define HID_BPF_FLAG_MASK (((HID_BPF_FLAG_MAX - 1) << 1) - 1) + +/* types of HID programs to attach to */ +enum hid_bpf_prog_type { + HID_BPF_PROG_TYPE_UNDEF = -1, + HID_BPF_PROG_TYPE_DEVICE_EVENT, /* an event is emitted from the device */ + HID_BPF_PROG_TYPE_MAX, +}; + +struct hid_bpf_ops { + struct module *owner; + struct bus_type *bus_type; +}; + +extern struct hid_bpf_ops *hid_bpf_ops; + +struct hid_bpf_prog_list { + u16 prog_idx[HID_BPF_MAX_PROGS_PER_DEV]; + u8 prog_cnt; +}; + +/* stored in each device */ +struct hid_bpf { + struct hid_bpf_prog_list __rcu *progs[HID_BPF_PROG_TYPE_MAX]; /* attached BPF progs */ + bool destroyed; /* prevents the assignment of any progs */ + + spinlock_t progs_lock; /* protects RCU update of progs */ +}; + +#ifdef CONFIG_HID_BPF +int dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, + u32 size, int interrupt); +void hid_bpf_destroy_device(struct hid_device *hid); +void hid_bpf_device_init(struct hid_device *hid); +#else /* CONFIG_HID_BPF */ +static inline int dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, + u8 *data, u32 size, int interrupt) { return 0; } +static inline void hid_bpf_destroy_device(struct hid_device *hid) {} +static inline void hid_bpf_device_init(struct hid_device *hid) {} +#endif /* CONFIG_HID_BPF */ + +#endif /* __HID_BPF_H */ -- cgit v1.2.3 From 658ee5a64fcfbbf758447fa3af425729eaabb0dc Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 3 Nov 2022 16:57:47 +0100 Subject: HID: bpf: allocate data memory for device_event BPF programs We need to also be able to change the size of the report. Reducing it is easy, because we already have the incoming buffer that is big enough, but extending it is harder. Pre-allocate a buffer that is big enough to handle all reports of the device, and use that as the primary buffer for BPF programs. To be able to change the size of the buffer, we change the device_event API and request it to return the size of the buffer. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid_bpf.h | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index de3fb1c376d2..b3de462ef3a6 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -28,15 +28,32 @@ struct hid_device; * a bigger index). * @hid: the ``struct hid_device`` representing the device itself * @report_type: used for ``hid_bpf_device_event()`` + * @allocated_size: Allocated size of data. + * + * This is how much memory is available and can be requested + * by the HID program. + * Note that for ``HID_BPF_RDESC_FIXUP``, that memory is set to + * ``4096`` (4 KB) * @size: Valid data in the data field. * * Programs can get the available valid size in data by fetching this field. + * Programs can also change this value by returning a positive number in the + * program. + * To discard the event, return a negative error code. + * + * ``size`` must always be less or equal than ``allocated_size`` (it is enforced + * once all BPF programs have been run). + * @retval: Return value of the previous program. */ struct hid_bpf_ctx { __u32 index; const struct hid_device *hid; + __u32 allocated_size; enum hid_report_type report_type; - __s32 size; + union { + __s32 retval; + __s32 size; + }; }; /** @@ -96,6 +113,12 @@ struct hid_bpf_prog_list { /* stored in each device */ struct hid_bpf { + u8 *device_data; /* allocated when a bpf program of type + * SEC(f.../hid_bpf_device_event) has been attached + * to this HID device + */ + u32 allocated_data; + struct hid_bpf_prog_list __rcu *progs[HID_BPF_PROG_TYPE_MAX]; /* attached BPF progs */ bool destroyed; /* prevents the assignment of any progs */ @@ -103,13 +126,17 @@ struct hid_bpf { }; #ifdef CONFIG_HID_BPF -int dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, - u32 size, int interrupt); +u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, + u32 *size, int interrupt); +int hid_bpf_connect_device(struct hid_device *hdev); +void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); void hid_bpf_device_init(struct hid_device *hid); #else /* CONFIG_HID_BPF */ -static inline int dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, - u8 *data, u32 size, int interrupt) { return 0; } +static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, + u8 *data, u32 *size, int interrupt) { return NULL; } +static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } +static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} static inline void hid_bpf_device_init(struct hid_device *hid) {} #endif /* CONFIG_HID_BPF */ -- cgit v1.2.3 From 91a7f802d1852f60139712bdcfa98db547ce0531 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 3 Nov 2022 16:57:49 +0100 Subject: HID: bpf: introduce hid_hw_request() This function can not be called under IRQ, thus it is only available while in SEC("syscall"). For consistency, this function requires a HID-BPF context to work with, and so we also provide a helper to create one based on the HID unique ID. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires -- changes in v12: - variable dereferenced before check 'ctx' |Reported-by: kernel test robot |Reported-by: Dan Carpenter no changes in v11 no changes in v10 changes in v9: - fixed kfunc declaration aaccording to latest upstream changes no changes in v8 changes in v7: - hid_bpf_allocate_context: remove unused variable - ensures buf is not NULL changes in v6: - rename parameter size into buf__sz to teach the verifier about the actual buffer size used by the call - remove the allocated data in the user created context, it's not used new-ish in v5 Signed-off-by: Jiri Kosina --- include/linux/hid_bpf.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index b3de462ef3a6..1d24b72059bc 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -76,11 +76,15 @@ enum hid_bpf_attach_flags { int hid_bpf_device_event(struct hid_bpf_ctx *ctx); /* Following functions are kfunc that we export to BPF programs */ -/* only available in tracing */ +/* available everywhere in HID-BPF */ __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz); /* only available in syscall */ int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags); +int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, + enum hid_report_type rtype, enum hid_class_request reqtype); +struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id); +void hid_bpf_release_context(struct hid_bpf_ctx *ctx); /* * Below is HID internal @@ -99,7 +103,14 @@ enum hid_bpf_prog_type { HID_BPF_PROG_TYPE_MAX, }; +struct hid_report_enum; + struct hid_bpf_ops { + struct hid_report *(*hid_get_report)(struct hid_report_enum *report_enum, const u8 *data); + int (*hid_hw_raw_request)(struct hid_device *hdev, + unsigned char reportnum, __u8 *buf, + size_t len, enum hid_report_type rtype, + enum hid_class_request reqtype); struct module *owner; struct bus_type *bus_type; }; -- cgit v1.2.3 From ad190df11a024c1214fbc8dcaab72c592c79d9b5 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 3 Nov 2022 16:57:51 +0100 Subject: HID: bpf: allow to change the report descriptor Add a new tracepoint hid_bpf_rdesc_fixup() so we can trigger a report descriptor fixup in the bpf world. Whenever the program gets attached/detached, the device is reconnected meaning that userspace will see it disappearing and reappearing with the new report descriptor. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid_bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 1d24b72059bc..9b11f8f25ad5 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -74,6 +74,7 @@ enum hid_bpf_attach_flags { /* Following functions are tracepoints that BPF programs can attach to */ int hid_bpf_device_event(struct hid_bpf_ctx *ctx); +int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx); /* Following functions are kfunc that we export to BPF programs */ /* available everywhere in HID-BPF */ @@ -100,6 +101,7 @@ int __hid_bpf_tail_call(struct hid_bpf_ctx *ctx); enum hid_bpf_prog_type { HID_BPF_PROG_TYPE_UNDEF = -1, HID_BPF_PROG_TYPE_DEVICE_EVENT, /* an event is emitted from the device */ + HID_BPF_PROG_TYPE_RDESC_FIXUP, HID_BPF_PROG_TYPE_MAX, }; @@ -143,6 +145,7 @@ int hid_bpf_connect_device(struct hid_device *hdev); void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); void hid_bpf_device_init(struct hid_device *hid); +u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size); #else /* CONFIG_HID_BPF */ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 *size, int interrupt) { return NULL; } @@ -150,6 +153,11 @@ static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} static inline void hid_bpf_device_init(struct hid_device *hid) {} +static inline u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size) +{ + return kmemdup(rdesc, *size, GFP_KERNEL); +} + #endif /* CONFIG_HID_BPF */ #endif /* __HID_BPF_H */ -- cgit v1.2.3 From 576e619ba4a47329bf55ada2d184e72529793fbd Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 16 Nov 2022 11:31:10 +0100 Subject: HID: bpf: return non NULL data pointer when CONFIG_HID_BPF is not set dispatch_hid_bpf_device_event() is supposed to return either an error, or a valid pointer to memory containing the data. Returning NULL simply makes a segfault when CONFIG_HID_BPF is not set for any processed event. Fixes: 658ee5a64fcfbbf ("HID: bpf: allocate data memory for device_event BPF program") Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid_bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 9b11f8f25ad5..3ca85ab91325 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -148,7 +148,7 @@ void hid_bpf_device_init(struct hid_device *hid); u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size); #else /* CONFIG_HID_BPF */ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, - u8 *data, u32 *size, int interrupt) { return NULL; } + u8 *data, u32 *size, int interrupt) { return data; } static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} -- cgit v1.2.3 From a189b2ee938f6b15ad9f95bdef63f95a3a092418 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 17 Nov 2022 12:47:29 +0100 Subject: fbdev: Make fb_modesetting_disabled() static inline Make fb_modesetting_disabled() a static-inline function when it is defined in the header file. Avoid the linker error shown below. ld: drivers/video/fbdev/core/fbmon.o: in function `fb_modesetting_disabled': fbmon.c:(.text+0x1e4): multiple definition of `fb_modesetting_disabled'; drivers/video/fbdev/core/fbmem.o:fbmem.c:(.text+0x1bac): first defined here A bug report is at [1]. Reported-by: Stephen Rothwell Reported-by: kernel test robot Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Fixes: 0ba2fa8cbd29 ("fbdev: Add support for the nomodeset kernel parameter") Cc: Javier Martinez Canillas Cc: Daniel Vetter Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Link: https://lore.kernel.org/dri-devel/20221117183214.2473e745@canb.auug.org.au/T/#u # 1 Link: https://patchwork.freedesktop.org/patch/msgid/20221117114729.7570-1-tzimmermann@suse.de --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 8dc9635f5bc1..a2a52f730989 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -807,7 +807,7 @@ extern int fb_find_mode(struct fb_var_screeninfo *var, #if defined(CONFIG_VIDEO_NOMODESET) bool fb_modesetting_disabled(const char *drvname); #else -bool fb_modesetting_disabled(const char *drvname) +static inline bool fb_modesetting_disabled(const char *drvname) { return false; } -- cgit v1.2.3 From 8b83e1a455382dc667898a525a93f4eb6716cc41 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 18 Nov 2022 14:35:34 +0100 Subject: Revert "drm/fb-helper: Schedule deferred-I/O worker after writing to framebuffer" This reverts commit 7f5cc4a3e5e4c5a38e5748defc952e45278f7a70. Needed to restore the fbdev damage worker. There have been bug reports about locking order [1] and incorrectly takens branches. [2] Restore the damage worker until these problems have been resovled. Signed-off-by: Thomas Zimmermann Acked-by: Daniel Vetter Link: https://intel-gfx-ci.01.org/tree/drm-tip/fi-kbl-8809g.html # 1 Link: https://lore.kernel.org/dri-devel/20221115115819.23088-6-tzimmermann@suse.de/T/#m06eedc0a468940e4cbbd14ca026733b639bc445a # 2 Link: https://patchwork.freedesktop.org/patch/msgid/20221118133535.9739-3-tzimmermann@suse.de --- include/linux/fb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index a2a52f730989..64889a2b783f 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -663,7 +663,6 @@ extern void fb_deferred_io_open(struct fb_info *info, struct inode *inode, struct file *file); extern void fb_deferred_io_cleanup(struct fb_info *info); -extern void fb_deferred_io_schedule_flush(struct fb_info *info); extern int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync); -- cgit v1.2.3 From b56ffa583350f605446d78cb4163114e4d1ac60c Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 23 Nov 2022 19:35:18 +0000 Subject: dma-buf: A collection of typo and documentation fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've been collecting these typo fixes for a while and it feels like time to send them in. Signed-off-by: T.J. Mercier Reviewed-by: Randy Dunlap Acked-by: Christian König Reviewed-by: Tommaso Merciai Link: https://patchwork.freedesktop.org/patch/msgid/20221123193519.3948105-1-tjmercier@google.com Signed-off-by: Christian König --- include/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 6fa8d4e29719..3f31baa3293f 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -356,7 +356,7 @@ struct dma_buf { */ const char *name; - /** @name_lock: Spinlock to protect name acces for read access. */ + /** @name_lock: Spinlock to protect name access for read access. */ spinlock_t name_lock; /** @@ -393,7 +393,7 @@ struct dma_buf { * anything the userspace API considers write access. * * - Drivers may just always add a write fence, since that only - * causes unecessarily synchronization, but no correctness issues. + * causes unnecessary synchronization, but no correctness issues. * * - Some drivers only expose a synchronous userspace API with no * pipelining across drivers. These do not set any fences for their -- cgit v1.2.3 From d878d3dc126db05b075147456644bd2d2ab1fb5e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 9 Dec 2022 16:07:22 +0100 Subject: wifi: mt76: mt7915: get rid of wed rx_buf_ring page_frag_cache Since wed rx_buf_ring page_frag_cache is no longer used in a hot path, remove it and rely on page allocation APIs in mt7915_mmio_wed_init_rx_buf() and mt7915_mmio_wed_release_rx_buf() Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- include/linux/soc/mediatek/mtk_wed.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index beb190449704..7293259dc1f8 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -103,7 +103,6 @@ struct mtk_wed_device { struct { int size; - struct page_frag_cache rx_page; struct mtk_rxbm_desc *desc; dma_addr_t desc_phys; } rx_buf_ring; -- cgit v1.2.3 From 78aa1cc9404399a15d2a1205329c6a06236f5378 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2022 22:44:28 +0100 Subject: bpf: Add struct for bin_args arg in bpf_bprintf_prepare Adding struct bpf_bprintf_data to hold bin_args argument for bpf_bprintf_prepare function. We will add another return argument to bpf_bprintf_prepare and pass the struct to bpf_bprintf_cleanup for proper cleanup in following changes. Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221215214430.1336195-2-jolsa@kernel.org --- include/linux/bpf.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3de24cfb7a3d..cc390ba32e70 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2796,8 +2796,13 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 +struct bpf_bprintf_data { + u32 *bin_args; + bool get_bin_args; +}; + int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_buf, u32 num_args); + u32 num_args, struct bpf_bprintf_data *data); void bpf_bprintf_cleanup(void); /* the implementation of the opaque uapi struct bpf_dynptr */ -- cgit v1.2.3 From f19a4050455aad847fb93f18dc1fe502eb60f989 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2022 22:44:29 +0100 Subject: bpf: Do cleanup in bpf_bprintf_cleanup only when needed Currently we always cleanup/decrement bpf_bprintf_nest_level variable in bpf_bprintf_cleanup if it's > 0. There's possible scenario where this could cause a problem, when bpf_bprintf_prepare does not get bin_args buffer (because num_args is 0) and following bpf_bprintf_cleanup call decrements bpf_bprintf_nest_level variable, like: in task context: bpf_bprintf_prepare(num_args != 0) increments 'bpf_bprintf_nest_level = 1' -> first irq : bpf_bprintf_prepare(num_args == 0) bpf_bprintf_cleanup decrements 'bpf_bprintf_nest_level = 0' -> second irq: bpf_bprintf_prepare(num_args != 0) bpf_bprintf_nest_level = 1 gets same buffer as task context above Adding check to bpf_bprintf_cleanup and doing the real cleanup only if we got bin_args data in the first place. Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221215214430.1336195-3-jolsa@kernel.org --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cc390ba32e70..656879385fbf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2803,7 +2803,7 @@ struct bpf_bprintf_data { int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data); -void bpf_bprintf_cleanup(void); +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); /* the implementation of the opaque uapi struct bpf_dynptr */ struct bpf_dynptr_kern { -- cgit v1.2.3 From e2bb9e01d589f7fa82573aedd2765ff9b277816a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2022 22:44:30 +0100 Subject: bpf: Remove trace_printk_lock Both bpf_trace_printk and bpf_trace_vprintk helpers use static buffer guarded with trace_printk_lock spin lock. The spin lock contention causes issues with bpf programs attached to contention_begin tracepoint [1][2]. Andrii suggested we could get rid of the contention by using trylock, but we could actually get rid of the spinlock completely by using percpu buffers the same way as for bin_args in bpf_bprintf_prepare function. Adding new return 'buf' argument to struct bpf_bprintf_data and making bpf_bprintf_prepare to return also the buffer for printk helpers. [1] https://lore.kernel.org/bpf/CACkBjsakT_yWxnSWr4r-0TpPvbKm9-OBmVUhJb7hV3hY8fdCkw@mail.gmail.com/ [2] https://lore.kernel.org/bpf/CACkBjsaCsTovQHFfkqJKto6S4Z8d02ud1D7MPESrHa1cVNNTrw@mail.gmail.com/ Reported-by: Hao Sun Suggested-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221215214430.1336195-4-jolsa@kernel.org --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 656879385fbf..5fec2d1be6d7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2795,10 +2795,13 @@ struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 +#define MAX_BPRINTF_BUF 1024 struct bpf_bprintf_data { u32 *bin_args; + char *buf; bool get_bin_args; + bool get_buf; }; int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, -- cgit v1.2.3 From 98c062e8245199fa9121141a0bf1035dc45ae90e Mon Sep 17 00:00:00 2001 From: Philipp Jungkamp Date: Fri, 25 Nov 2022 00:38:38 +0100 Subject: HID: hid-sensor-custom: Allow more custom iio sensors The known LUID table for established/known custom HID sensors was limited to sensors with "INTEL" as manufacturer. But some vendors such as Lenovo also include fairly standard iio sensors (e.g. ambient light) in their custom sensors. Expand the known custom sensors table by a tag used for the platform device name and match sensors based on the LUID as well as optionally on model and manufacturer properties. Signed-off-by: Philipp Jungkamp Reviewed-by: Jonathan Cameron Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- include/linux/hid-sensor-ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index ac631159403a..13b1e65fbdcc 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -132,6 +132,7 @@ #define HID_USAGE_SENSOR_PROP_FRIENDLY_NAME 0x200301 #define HID_USAGE_SENSOR_PROP_SERIAL_NUM 0x200307 #define HID_USAGE_SENSOR_PROP_MANUFACTURER 0x200305 +#define HID_USAGE_SENSOR_PROP_MODEL 0x200306 #define HID_USAGE_SENSOR_PROP_REPORT_INTERVAL 0x20030E #define HID_USAGE_SENSOR_PROP_SENSITIVITY_ABS 0x20030F #define HID_USAGE_SENSOR_PROP_SENSITIVITY_RANGE_PCT 0x200310 -- cgit v1.2.3 From a608dc1c06397dc50ab773498433432fb5938f92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 24 Nov 2022 18:59:37 +0100 Subject: HID: input: map battery system charging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HID descriptors with Battery System (0x85) Charging (0x44) usage are ignored and POWER_SUPPLY_STATUS_DISCHARGING is always reported to user space, even when the device is charging. Map this usage and when it is reported set the right charging status. In addition, add KUnit tests to make sure that the charging status is correctly set and reported. They can be run with the usual command: $ ./tools/testing/kunit/kunit.py run --kunitconfig=drivers/hid Signed-off-by: José Expósito Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 8677ae38599e..1eb5408599cd 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -312,6 +312,7 @@ struct hid_item { #define HID_DG_LATENCYMODE 0x000d0060 #define HID_BAT_ABSOLUTESTATEOFCHARGE 0x00850065 +#define HID_BAT_CHARGING 0x00850044 #define HID_VD_ASUS_CUSTOM_MEDIA_KEYS 0xff310076 @@ -611,6 +612,7 @@ struct hid_device { /* device report descriptor */ __s32 battery_max; __s32 battery_report_type; __s32 battery_report_id; + __s32 battery_charge_status; enum hid_battery_status battery_status; bool battery_avoid_query; ktime_t battery_ratelimit_time; -- cgit v1.2.3 From ee3e3ac05c2631ce1f12d88c9cc9a092f8fe947a Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 22 Nov 2022 15:39:05 -0500 Subject: rseq: Introduce extensible rseq ABI Introduce the extensible rseq ABI, where the feature size supported by the kernel and the required alignment are communicated to user-space through ELF auxiliary vectors. This allows user-space to call rseq registration with a rseq_len of either 32 bytes for the original struct rseq size (which includes padding), or larger. If rseq_len is larger than 32 bytes, then it must be large enough to contain the feature size communicated to user-space through ELF auxiliary vectors. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20221122203932.231377-4-mathieu.desnoyers@efficios.com --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 853d08f7562b..e0bc020a63a9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1302,6 +1302,7 @@ struct task_struct { #ifdef CONFIG_RSEQ struct rseq __user *rseq; + u32 rseq_len; u32 rseq_sig; /* * RmW on rseq_event_mask must be performed atomically @@ -2352,10 +2353,12 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { if (clone_flags & CLONE_VM) { t->rseq = NULL; + t->rseq_len = 0; t->rseq_sig = 0; t->rseq_event_mask = 0; } else { t->rseq = current->rseq; + t->rseq_len = current->rseq_len; t->rseq_sig = current->rseq_sig; t->rseq_event_mask = current->rseq_event_mask; } @@ -2364,6 +2367,7 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) static inline void rseq_execve(struct task_struct *t) { t->rseq = NULL; + t->rseq_len = 0; t->rseq_sig = 0; t->rseq_event_mask = 0; } -- cgit v1.2.3 From af7f588d8f7355bc4298dd1962d7826358fc95f0 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 22 Nov 2022 15:39:09 -0500 Subject: sched: Introduce per-memory-map concurrency ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This feature allows the scheduler to expose a per-memory map concurrency ID to user-space. This concurrency ID is within the possible cpus range, and is temporarily (and uniquely) assigned while threads are actively running within a memory map. If a memory map has fewer threads than cores, or is limited to run on few cores concurrently through sched affinity or cgroup cpusets, the concurrency IDs will be values close to 0, thus allowing efficient use of user-space memory for per-cpu data structures. This feature is meant to be exposed by a new rseq thread area field. The primary purpose of this feature is to do the heavy-lifting needed by memory allocators to allow them to use per-cpu data structures efficiently in the following situations: - Single-threaded applications, - Multi-threaded applications on large systems (many cores) with limited cpu affinity mask, - Multi-threaded applications on large systems (many cores) with restricted cgroup cpuset per container. One of the key concern from scheduler maintainers is the overhead associated with additional spin locks or atomic operations in the scheduler fast-path. This is why the following optimization is implemented. On context switch between threads belonging to the same memory map, transfer the mm_cid from prev to next without any atomic ops. This takes care of use-cases involving frequent context switch between threads belonging to the same memory map. Additional optimizations can be done if the spin locks added when context switching between threads belonging to different memory maps end up being a performance bottleneck. Those are left out of this patch though. A performance impact would have to be clearly demonstrated to justify the added complexity. The credit goes to Paul Turner (Google) for the original virtual cpu id idea. This feature is implemented based on the discussions with Paul Turner and Peter Oskolkov (Google), but I took the liberty to implement scheduler fast-path optimizations and my own NUMA-awareness scheme. The rumor has it that Google have been running a rseq vcpu_id extension internally in production for a year. The tcmalloc source code indeed has comments hinting at a vcpu_id prototype extension to the rseq system call [1]. The following benchmarks do not show any significant overhead added to the scheduler context switch by this feature: * perf bench sched messaging (process) Baseline: 86.5±0.3 ms With mm_cid: 86.7±2.6 ms * perf bench sched messaging (threaded) Baseline: 84.3±3.0 ms With mm_cid: 84.7±2.6 ms * hackbench (process) Baseline: 82.9±2.7 ms With mm_cid: 82.9±2.9 ms * hackbench (threaded) Baseline: 85.2±2.6 ms With mm_cid: 84.4±2.9 ms [1] https://github.com/google/tcmalloc/blob/master/tcmalloc/internal/linux_syscall_support.h#L26 Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20221122203932.231377-8-mathieu.desnoyers@efficios.com --- include/linux/mm.h | 25 +++++++++++++++++++++++++ include/linux/mm_types.h | 43 ++++++++++++++++++++++++++++++++++++++++++- include/linux/sched.h | 5 +++++ 3 files changed, 72 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f3f196e4d66d..cf008c26a883 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1976,6 +1976,31 @@ struct zap_details { /* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */ #define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1)) +#ifdef CONFIG_SCHED_MM_CID +void sched_mm_cid_before_execve(struct task_struct *t); +void sched_mm_cid_after_execve(struct task_struct *t); +void sched_mm_cid_fork(struct task_struct *t); +void sched_mm_cid_exit_signals(struct task_struct *t); +static inline int task_mm_cid(struct task_struct *t) +{ + return t->mm_cid; +} +#else +static inline void sched_mm_cid_before_execve(struct task_struct *t) { } +static inline void sched_mm_cid_after_execve(struct task_struct *t) { } +static inline void sched_mm_cid_fork(struct task_struct *t) { } +static inline void sched_mm_cid_exit_signals(struct task_struct *t) { } +static inline int task_mm_cid(struct task_struct *t) +{ + /* + * Use the processor id as a fall-back when the mm cid feature is + * disabled. This provides functional per-cpu data structure accesses + * in user-space, althrough it won't provide the memory usage benefits. + */ + return raw_smp_processor_id(); +} +#endif + #ifdef CONFIG_MMU extern bool can_do_mlock(void); #else diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3b8475007734..1c3bf76063d2 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -645,7 +645,18 @@ struct mm_struct { * &struct mm_struct is freed. */ atomic_t mm_count; - +#ifdef CONFIG_SCHED_MM_CID + /** + * @cid_lock: Protect cid bitmap updates vs lookups. + * + * Prevent situations where updates to the cid bitmap happen + * concurrently with lookups. Those can lead to situations + * where a lookup cannot find a free bit simply because it was + * unlucky enough to load, non-atomically, bitmap words as they + * were being concurrently updated by the updaters. + */ + raw_spinlock_t cid_lock; +#endif #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif @@ -909,6 +920,36 @@ static inline void vma_iter_init(struct vma_iterator *vmi, vmi->mas.node = MAS_START; } +#ifdef CONFIG_SCHED_MM_CID +/* Accessor for struct mm_struct's cidmask. */ +static inline cpumask_t *mm_cidmask(struct mm_struct *mm) +{ + unsigned long cid_bitmap = (unsigned long)mm; + + cid_bitmap += offsetof(struct mm_struct, cpu_bitmap); + /* Skip cpu_bitmap */ + cid_bitmap += cpumask_size(); + return (struct cpumask *)cid_bitmap; +} + +static inline void mm_init_cid(struct mm_struct *mm) +{ + raw_spin_lock_init(&mm->cid_lock); + cpumask_clear(mm_cidmask(mm)); +} + +static inline unsigned int mm_cid_size(void) +{ + return cpumask_size(); +} +#else /* CONFIG_SCHED_MM_CID */ +static inline void mm_init_cid(struct mm_struct *mm) { } +static inline unsigned int mm_cid_size(void) +{ + return 0; +} +#endif /* CONFIG_SCHED_MM_CID */ + struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); diff --git a/include/linux/sched.h b/include/linux/sched.h index e0bc020a63a9..4df2b3e76b30 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1311,6 +1311,11 @@ struct task_struct { unsigned long rseq_event_mask; #endif +#ifdef CONFIG_SCHED_MM_CID + int mm_cid; /* Current cid in mm */ + int mm_cid_active; /* Whether cid bitmap is active */ +#endif + struct tlbflush_unmap_batch tlb_ubc; union { -- cgit v1.2.3 From c89970202a1153b2fc230e89f90c180bd5bcbcef Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 20 Dec 2022 17:07:05 +1000 Subject: cputime: remove cputime_to_nsecs fallback The archs that use cputime_to_nsecs() internally provide their own definition and don't need the fallback. cputime_to_usecs() unused except in this fallback, and is not defined anywhere. This removes the final remnant of the cputime_t code from the kernel. Signed-off-by: Nicholas Piggin Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexander Gordeev Link: https://lore.kernel.org/r/20221220070705.2958959-1-npiggin@gmail.com --- include/linux/sched/cputime.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index ce3c58286062..5f8fd5b24a2e 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -8,15 +8,6 @@ * cputime accounting APIs: */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#include - -#ifndef cputime_to_nsecs -# define cputime_to_nsecs(__ct) \ - (cputime_to_usecs(__ct) * NSEC_PER_USEC) -#endif -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ - #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime); -- cgit v1.2.3 From a73bf9f2d969cbb04d5ca778f2a224060cda1027 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 22 Dec 2022 21:49:16 -0800 Subject: bpf: reorganize struct bpf_reg_state fields Move id and ref_obj_id fields after scalar data section (var_off and ranges). This is necessary to simplify next patch which will change regsafe()'s logic to be safer, as it makes the contents that has to be an exact match (type-specific parts, off, type, and var_off+ranges) a single sequential block of memory, while id and ref_obj_id should always be remapped and thus can't be memcp()'ed. There are few places that assume that var_off is after id/ref_obj_id to clear out id/ref_obj_id with the single memset(0). These are changed to explicitly zero-out id/ref_obj_id fields. Other places are adjusted to preserve exact byte-by-byte comparison behavior. No functional changes. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20221223054921.958283-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 53d175cbaa02..127058cfec47 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -92,6 +92,26 @@ struct bpf_reg_state { u32 subprogno; /* for PTR_TO_FUNC */ }; + /* For scalar types (SCALAR_VALUE), this represents our knowledge of + * the actual value. + * For pointer types, this represents the variable part of the offset + * from the pointed-to object, and is shared with all bpf_reg_states + * with the same id as us. + */ + struct tnum var_off; + /* Used to determine if any memory access using this register will + * result in a bad access. + * These refer to the same value as var_off, not necessarily the actual + * contents of the register. + */ + s64 smin_value; /* minimum possible (s64)value */ + s64 smax_value; /* maximum possible (s64)value */ + u64 umin_value; /* minimum possible (u64)value */ + u64 umax_value; /* maximum possible (u64)value */ + s32 s32_min_value; /* minimum possible (s32)value */ + s32 s32_max_value; /* maximum possible (s32)value */ + u32 u32_min_value; /* minimum possible (u32)value */ + u32 u32_max_value; /* maximum possible (u32)value */ /* For PTR_TO_PACKET, used to find other pointers with the same variable * offset, so they can share range knowledge. * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we @@ -144,26 +164,6 @@ struct bpf_reg_state { * allowed and has the same effect as bpf_sk_release(sk). */ u32 ref_obj_id; - /* For scalar types (SCALAR_VALUE), this represents our knowledge of - * the actual value. - * For pointer types, this represents the variable part of the offset - * from the pointed-to object, and is shared with all bpf_reg_states - * with the same id as us. - */ - struct tnum var_off; - /* Used to determine if any memory access using this register will - * result in a bad access. - * These refer to the same value as var_off, not necessarily the actual - * contents of the register. - */ - s64 smin_value; /* minimum possible (s64)value */ - s64 smax_value; /* maximum possible (s64)value */ - u64 umin_value; /* minimum possible (u64)value */ - u64 umax_value; /* maximum possible (u64)value */ - s32 s32_min_value; /* minimum possible (s32)value */ - s32 s32_max_value; /* maximum possible (s32)value */ - u32 u32_min_value; /* minimum possible (u32)value */ - u32 u32_max_value; /* maximum possible (u32)value */ /* parentage chain for liveness checking */ struct bpf_reg_state *parent; /* Inside the callee two registers can be both PTR_TO_STACK like -- cgit v1.2.3 From e873d4329ccb891bf3b17f1e0d44396de943e92d Mon Sep 17 00:00:00 2001 From: Joy Zou Date: Tue, 15 Nov 2022 17:38:23 +0800 Subject: dmaengine: imx-sdma: support hdmi in sdma The hdmi script already supported in sdma firmware. So add support hdmi in sdma driver. The design of hdmi script is different from common script such as sai. There is no need to config buffer descriptor for HDMI. The cyclic capability is achieved by the hdmi script. The slave config is so simple, only config src_addr, dts_addr and direction DMA_TRANS_NONE. Signed-off-by: Joy Zou Reviewed-by: Sascha Hauer Link: https://lore.kernel.org/r/20221115093823.2879128-3-joy.zou@nxp.com Signed-off-by: Vinod Koul --- include/linux/dma/imx-dma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma/imx-dma.h b/include/linux/dma/imx-dma.h index f487a4fa103a..cfec5f946e23 100644 --- a/include/linux/dma/imx-dma.h +++ b/include/linux/dma/imx-dma.h @@ -40,6 +40,7 @@ enum sdma_peripheral_type { IMX_DMATYPE_ASRC_SP, /* Shared ASRC */ IMX_DMATYPE_SAI, /* SAI */ IMX_DMATYPE_MULTI_SAI, /* MULTI FIFOs For Audio */ + IMX_DMATYPE_HDMI, /* HDMI Audio */ }; enum imx_dma_prio { -- cgit v1.2.3 From 08f01cc1d6e240092a6d9bfa21652622657f38f0 Mon Sep 17 00:00:00 2001 From: Gerald Loacker Date: Thu, 1 Dec 2022 08:22:18 +0100 Subject: iio: add struct declaration for iio types Add struct for iio type arrays such as IIO_AVAIL_LIST which can be used instead of int arrays. Signed-off-by: Gerald Loacker Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221201072220.402585-2-gerald.loacker@wolfvision.net Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 8e0afaaa3f75..81413cd3a3e7 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -381,6 +381,11 @@ s64 iio_get_time_ns(const struct iio_dev *indio_dev); #define INDIO_MAX_RAW_ELEMENTS 4 +struct iio_val_int_plus_micro { + int integer; + int micro; +}; + struct iio_trigger; /* forward declaration */ /** -- cgit v1.2.3 From 1f5e408f6a000be980872b8065e547e2dbef6acc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 7 Dec 2022 21:03:44 +0200 Subject: iio: light: tsl2563: Drop legacy platform data code There is no in-kernel user for legacy platform data. Otherwise, a new one can use software nodes instead. Hence, drop legacy platform data code. Signed-off-by: Andy Shevchenko Tested-by: Ferry Toth Link: https://lore.kernel.org/r/20221207190348.9347-7-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/tsl2563.h | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 include/linux/platform_data/tsl2563.h (limited to 'include/linux') diff --git a/include/linux/platform_data/tsl2563.h b/include/linux/platform_data/tsl2563.h deleted file mode 100644 index 9cf9309c3f24..000000000000 --- a/include/linux/platform_data/tsl2563.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_TSL2563_H -#define __LINUX_TSL2563_H - -struct tsl2563_platform_data { - int cover_comp_gain; -}; - -#endif /* __LINUX_TSL2563_H */ -- cgit v1.2.3 From c941b98781b348bd63fb8f8b5307b10fde2af0c1 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 9 Dec 2022 18:48:39 +0200 Subject: clk: qcom: smd-rpm: remove duplication between MMXI and MMAXI defines The commit 644c42295592 ("clk: qcom: smd: Add SM6375 clocks") added a duplicate of the existing define QCOM_SMD_RPM_MMAXI_CLK, drop it now. Fixes: 644c42295592 ("clk: qcom: smd: Add SM6375 clocks") Reviewed-by: Alex Elder Signed-off-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221209164855.128798-4-dmitry.baryshkov@linaro.org --- include/linux/soc/qcom/smd-rpm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 62de54992e49..2990f425fdef 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -43,7 +43,6 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_HWKM_CLK 0x6d6b7768 #define QCOM_SMD_RPM_PKA_CLK 0x616b70 #define QCOM_SMD_RPM_MCFG_CLK 0x6766636d -#define QCOM_SMD_RPM_MMXI_CLK 0x69786d6d int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm, int state, -- cgit v1.2.3 From 30465003ad776a922c32b2dac58db14f120f037e Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Sat, 17 Dec 2022 00:24:57 -0800 Subject: bpf: rename list_head -> graph_root in field info types Many of the structs recently added to track field info for linked-list head are useful as-is for rbtree root. So let's do a mechanical renaming of list_head-related types and fields: include/linux/bpf.h: struct btf_field_list_head -> struct btf_field_graph_root list_head -> graph_root in struct btf_field union kernel/bpf/btf.c: list_head -> graph_root in struct btf_field_info This is a nonfunctional change, functionality to actually use these fields for rbtree will be added in further patches. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20221217082506.1570898-5-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5fec2d1be6d7..1697bd87fc06 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -189,7 +189,7 @@ struct btf_field_kptr { u32 btf_id; }; -struct btf_field_list_head { +struct btf_field_graph_root { struct btf *btf; u32 value_btf_id; u32 node_offset; @@ -201,7 +201,7 @@ struct btf_field { enum btf_field_type type; union { struct btf_field_kptr kptr; - struct btf_field_list_head list_head; + struct btf_field_graph_root graph_root; }; }; -- cgit v1.2.3 From 26c48aea147cbf3dcec1df67d0684457ddf00fea Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 29 Dec 2022 18:38:35 +0800 Subject: spi: altera: switch to use modern name Change legacy name master/slave to modern name host/target or controller. No functional changed. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221229103837.4192759-2-yangyingliang@huawei.com Signed-off-by: Mark Brown --- include/linux/spi/altera.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/altera.h b/include/linux/spi/altera.h index 2e2a622e56da..3b74c3750caf 100644 --- a/include/linux/spi/altera.h +++ b/include/linux/spi/altera.h @@ -14,7 +14,7 @@ /** * struct altera_spi_platform_data - Platform data of the Altera SPI driver - * @mode_bits: Mode bits of SPI master. + * @mode_bits: Mode bits of SPI host. * @num_chipselect: Number of chipselects. * @bits_per_word_mask: bitmask of supported bits_per_word for transfers. * @num_devices: Number of devices that shall be added when the driver @@ -46,5 +46,5 @@ struct altera_spi { }; extern irqreturn_t altera_spi_irq(int irq, void *dev); -extern void altera_spi_init_master(struct spi_master *master); +extern void altera_spi_init_host(struct spi_controller *host); #endif /* __LINUX_SPI_ALTERA_H */ -- cgit v1.2.3 From 63a1bd8ad1ac9e4e8bfcd5914c8899606e04898d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 30 Nov 2022 23:08:53 +0000 Subject: KVM: Drop arch hardware (un)setup hooks Drop kvm_arch_hardware_setup() and kvm_arch_hardware_unsetup() now that all implementations are nops. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Eric Farman # s390 Acked-by: Anup Patel Message-Id: <20221130230934.1014142-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4f26b244f6d0..7c1009c0e66d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1447,8 +1447,6 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); -int kvm_arch_hardware_setup(void *opaque); -void kvm_arch_hardware_unsetup(void); int kvm_arch_check_processor_compat(void *opaque); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 466d27e48d7cd2542710309817cff1e91cd2e10a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 30 Nov 2022 23:09:00 +0000 Subject: KVM: arm64: Simplify the CPUHP logic For a number of historical reasons, the KVM/arm64 hotplug setup is pretty complicated, and we have two extra CPUHP notifiers for vGIC and timers. It looks pretty pointless, and gets in the way of further changes. So let's just expose some helpers that can be called from the core CPUHP callback, and get rid of everything else. This gives us the opportunity to drop a useless notifier entry, as well as tidy-up the timer enable/disable, which was a bit odd. Signed-off-by: Marc Zyngier Signed-off-by: Isaku Yamahata Signed-off-by: Sean Christopherson Message-Id: <20221130230934.1014142-17-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/cpuhotplug.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 6c6859bfc454..5cae6bd22f7f 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -188,9 +188,6 @@ enum cpuhp_state { CPUHP_AP_TI_GP_TIMER_STARTING, CPUHP_AP_HYPERV_TIMER_STARTING, CPUHP_AP_KVM_STARTING, - CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, - CPUHP_AP_KVM_ARM_VGIC_STARTING, - CPUHP_AP_KVM_ARM_TIMER_STARTING, /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, -- cgit v1.2.3 From a578a0a9e3526483ad1904fac019d95e7089fb34 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 30 Nov 2022 23:09:13 +0000 Subject: KVM: Drop kvm_arch_{init,exit}() hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop kvm_arch_init() and kvm_arch_exit() now that all implementations are nops. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Eric Farman # s390 Reviewed-by: Philippe Mathieu-Daudé Acked-by: Anup Patel Message-Id: <20221130230934.1014142-30-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7c1009c0e66d..62d977bb1448 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1423,9 +1423,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu); -int kvm_arch_init(void *opaque); -void kvm_arch_exit(void); - void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -- cgit v1.2.3 From 81a1cf9f89a6b71e71bfd7d43837ce9235e70b38 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 30 Nov 2022 23:09:16 +0000 Subject: KVM: Drop kvm_arch_check_processor_compat() hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop kvm_arch_check_processor_compat() and its support code now that all architecture implementations are nops. Signed-off-by: Sean Christopherson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Eric Farman # s390 Acked-by: Anup Patel Reviewed-by: Kai Huang Message-Id: <20221130230934.1014142-33-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 62d977bb1448..20e207e4c6c1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -956,8 +956,7 @@ static inline void kvm_irqfd_exit(void) { } #endif -int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, - struct module *module); +int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module); void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); @@ -1444,7 +1443,6 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); -int kvm_arch_check_processor_compat(void *opaque); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From aaf12a7b4323eb7d94677bcefc286ff6b772ed1c Mon Sep 17 00:00:00 2001 From: Chao Gao Date: Wed, 30 Nov 2022 23:09:25 +0000 Subject: KVM: Rename and move CPUHP_AP_KVM_STARTING to ONLINE section The CPU STARTING section doesn't allow callbacks to fail. Move KVM's hotplug callback to ONLINE section so that it can abort onlining a CPU in certain cases to avoid potentially breaking VMs running on existing CPUs. For example, when KVM fails to enable hardware virtualization on the hotplugged CPU. Place KVM's hotplug state before CPUHP_AP_SCHED_WAIT_EMPTY as it ensures when offlining a CPU, all user tasks and non-pinned kernel tasks have left the CPU, i.e. there cannot be a vCPU task around. So, it is safe for KVM's CPU offline callback to disable hardware virtualization at that point. Likewise, KVM's online callback can enable hardware virtualization before any vCPU task gets a chance to run on hotplugged CPUs. Drop kvm_x86_check_processor_compatibility()'s WARN that IRQs are disabled, as the ONLINE section runs with IRQs disabled. The WARN wasn't intended to be a requirement, e.g. disabling preemption is sufficient, the IRQ thing was purely an aggressive sanity check since the helper was only ever invoked via SMP function call. Rename KVM's CPU hotplug callbacks accordingly. Suggested-by: Thomas Gleixner Signed-off-by: Chao Gao Signed-off-by: Isaku Yamahata Reviewed-by: Yuan Yao [sean: drop WARN that IRQs are disabled] Signed-off-by: Sean Christopherson Message-Id: <20221130230934.1014142-42-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/cpuhotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 5cae6bd22f7f..5b2f8147d1ae 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -187,7 +187,6 @@ enum cpuhp_state { CPUHP_AP_CSKY_TIMER_STARTING, CPUHP_AP_TI_GP_TIMER_STARTING, CPUHP_AP_HYPERV_TIMER_STARTING, - CPUHP_AP_KVM_STARTING, /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, @@ -202,6 +201,7 @@ enum cpuhp_state { /* Online section invoked on the hotplugged CPU from the hotplug thread */ CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_KVM_ONLINE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_X86_VDSO_VMA_ONLINE, -- cgit v1.2.3 From 441f7bfa99fe2b8a7e504aa72047e20579e88a5d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 30 Nov 2022 23:09:33 +0000 Subject: KVM: Opt out of generic hardware enabling on s390 and PPC Allow architectures to opt out of the generic hardware enabling logic, and opt out on both s390 and PPC, which don't need to manually enable virtualization as it's always on (when available). In addition to letting s390 and PPC drop a bit of dead code, this will hopefully also allow ARM to clean up its related code, e.g. ARM has its own per-CPU flag to track which CPUs have enable hardware due to the need to keep hardware enabled indefinitely when pKVM is enabled. Signed-off-by: Sean Christopherson Acked-by: Anup Patel Message-Id: <20221130230934.1014142-50-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 20e207e4c6c1..109b18e2789c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1441,8 +1441,10 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); +#endif int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); @@ -2074,7 +2076,9 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) } } +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING extern bool kvm_rebooting; +#endif extern unsigned int halt_poll_ns; extern unsigned int halt_poll_ns_grow; -- cgit v1.2.3 From 443a0a0f0cf4f432c7af6654b7f2f920d411d379 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 19 Dec 2022 14:42:33 +0200 Subject: pinctrl: Introduce struct pinfunction and PINCTRL_PINFUNCTION() macro There are many pin control drivers define their own data type for pin function representation which is the same or embed the same data as newly introduced one. Provide the data type and convenient macro for all pin control drivers. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Acked-by: Mika Westerberg --- include/linux/pinctrl/pinctrl.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index a0d39b303431..4d252ea00ed1 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -206,6 +206,26 @@ extern int pinctrl_get_group_pins(struct pinctrl_dev *pctldev, const char *pin_group, const unsigned **pins, unsigned *num_pins); +/** + * struct pinfunction - Description about a function + * @name: Name of the function + * @groups: An array of groups for this function + * @ngroups: Number of groups in @groups + */ +struct pinfunction { + const char *name; + const char * const *groups; + size_t ngroups; +}; + +/* Convenience macro to define a single named pinfunction */ +#define PINCTRL_PINFUNCTION(_name, _groups, _ngroups) \ +(struct pinfunction) { \ + .name = (_name), \ + .groups = (_groups), \ + .ngroups = (_ngroups), \ + } + #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_PINCTRL) extern struct pinctrl_dev *of_pinctrl_get(struct device_node *np); #else -- cgit v1.2.3 From a3c1f066e1c514ac819f5dae288cc8a59c384158 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 19 Dec 2022 23:46:15 +0800 Subject: thermal/intel: Introduce Intel TCC library There are several different drivers that accesses the Intel TCC (thermal control circuitry) MSRs, and each of them has its own implementation for the same functionalities, e.g. getting the current temperature, getting the tj_max, and getting/setting the tj_max offset. Introduce a library to unify the code for Intel CPU TCC MSR access. At the same time, ensure the temperature is got based on the updated tjmax value because tjmax can be changed at runtime for cases like the Intel SST-PP (Intel Speed Select Technology - Performance Profile) level change. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- include/linux/intel_tcc.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/intel_tcc.h (limited to 'include/linux') diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h new file mode 100644 index 000000000000..f422612c28d6 --- /dev/null +++ b/include/linux/intel_tcc.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * header for Intel TCC (thermal control circuitry) library + * + * Copyright (C) 2022 Intel Corporation. + */ + +#ifndef __INTEL_TCC_H__ +#define __INTEL_TCC_H__ + +#include + +int intel_tcc_get_tjmax(int cpu); +int intel_tcc_get_offset(int cpu); +int intel_tcc_set_offset(int cpu, int offset); +int intel_tcc_get_temp(int cpu, bool pkg); + +#endif /* __INTEL_TCC_H__ */ -- cgit v1.2.3 From a6528a960b78715d4c3d2c9cda85714b15a0faa4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 14 Dec 2022 14:43:01 -0800 Subject: fsverity: optimize fsverity_file_open() on non-verity files Make fsverity_file_open() an inline function that does the IS_VERITY() check, then (if needed) calls __fsverity_file_open() to do the real work. This reduces the overhead on non-verity files. Signed-off-by: Eric Biggers Acked-by: Dave Chinner Link: https://lore.kernel.org/r/20221214224304.145712-2-ebiggers@kernel.org --- include/linux/fsverity.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 40f14e5fed9d..326bf2e2b903 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -148,7 +148,7 @@ int fsverity_get_digest(struct inode *inode, /* open.c */ -int fsverity_file_open(struct inode *inode, struct file *filp); +int __fsverity_file_open(struct inode *inode, struct file *filp); int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr); void fsverity_cleanup_inode(struct inode *inode); @@ -193,9 +193,9 @@ static inline int fsverity_get_digest(struct inode *inode, /* open.c */ -static inline int fsverity_file_open(struct inode *inode, struct file *filp) +static inline int __fsverity_file_open(struct inode *inode, struct file *filp) { - return IS_VERITY(inode) ? -EOPNOTSUPP : 0; + return -EOPNOTSUPP; } static inline int fsverity_prepare_setattr(struct dentry *dentry, @@ -254,4 +254,24 @@ static inline bool fsverity_active(const struct inode *inode) return fsverity_get_info(inode) != NULL; } +/** + * fsverity_file_open() - prepare to open a verity file + * @inode: the inode being opened + * @filp: the struct file being set up + * + * When opening a verity file, deny the open if it is for writing. Otherwise, + * set up the inode's ->i_verity_info if not already done. + * + * When combined with fscrypt, this must be called after fscrypt_file_open(). + * Otherwise, we won't have the key set up to decrypt the verity metadata. + * + * Return: 0 on success, -errno on failure + */ +static inline int fsverity_file_open(struct inode *inode, struct file *filp) +{ + if (IS_VERITY(inode)) + return __fsverity_file_open(inode, filp); + return 0; +} + #endif /* _LINUX_FSVERITY_H */ -- cgit v1.2.3 From 01d90c07a592b532c7a673dfd8baa6d6e496273d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 14 Dec 2022 14:43:02 -0800 Subject: fsverity: optimize fsverity_prepare_setattr() on non-verity files Make fsverity_prepare_setattr() an inline function that does the IS_VERITY() check, then (if needed) calls __fsverity_prepare_setattr() to do the real work. This reduces the overhead on non-verity files. Signed-off-by: Eric Biggers Acked-by: Dave Chinner Link: https://lore.kernel.org/r/20221214224304.145712-3-ebiggers@kernel.org --- include/linux/fsverity.h | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 326bf2e2b903..84b498fff7ec 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -149,7 +149,7 @@ int fsverity_get_digest(struct inode *inode, /* open.c */ int __fsverity_file_open(struct inode *inode, struct file *filp); -int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr); +int __fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr); void fsverity_cleanup_inode(struct inode *inode); /* read_metadata.c */ @@ -198,10 +198,10 @@ static inline int __fsverity_file_open(struct inode *inode, struct file *filp) return -EOPNOTSUPP; } -static inline int fsverity_prepare_setattr(struct dentry *dentry, - struct iattr *attr) +static inline int __fsverity_prepare_setattr(struct dentry *dentry, + struct iattr *attr) { - return IS_VERITY(d_inode(dentry)) ? -EOPNOTSUPP : 0; + return -EOPNOTSUPP; } static inline void fsverity_cleanup_inode(struct inode *inode) @@ -274,4 +274,22 @@ static inline int fsverity_file_open(struct inode *inode, struct file *filp) return 0; } +/** + * fsverity_prepare_setattr() - prepare to change a verity inode's attributes + * @dentry: dentry through which the inode is being changed + * @attr: attributes to change + * + * Verity files are immutable, so deny truncates. This isn't covered by the + * open-time check because sys_truncate() takes a path, not a file descriptor. + * + * Return: 0 on success, -errno on failure + */ +static inline int fsverity_prepare_setattr(struct dentry *dentry, + struct iattr *attr) +{ + if (IS_VERITY(d_inode(dentry))) + return __fsverity_prepare_setattr(dentry, attr); + return 0; +} + #endif /* _LINUX_FSVERITY_H */ -- cgit v1.2.3 From 9642946c6c851ba954689f184b3370e3594b6b1a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 14 Dec 2022 14:43:03 -0800 Subject: fsverity: optimize fsverity_cleanup_inode() on non-verity files Make fsverity_cleanup_inode() an inline function that checks for non-NULL ->i_verity_info, then (if needed) calls __fsverity_cleanup_inode() to do the real work. This reduces the overhead on non-verity files. Signed-off-by: Eric Biggers Acked-by: Dave Chinner Link: https://lore.kernel.org/r/20221214224304.145712-4-ebiggers@kernel.org --- include/linux/fsverity.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 84b498fff7ec..203f4962c54a 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -150,7 +150,19 @@ int fsverity_get_digest(struct inode *inode, int __fsverity_file_open(struct inode *inode, struct file *filp); int __fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr); -void fsverity_cleanup_inode(struct inode *inode); +void __fsverity_cleanup_inode(struct inode *inode); + +/** + * fsverity_cleanup_inode() - free the inode's verity info, if present + * @inode: an inode being evicted + * + * Filesystems must call this on inode eviction to free ->i_verity_info. + */ +static inline void fsverity_cleanup_inode(struct inode *inode) +{ + if (inode->i_verity_info) + __fsverity_cleanup_inode(inode); +} /* read_metadata.c */ -- cgit v1.2.3 From 72ea15f0ddd29b9facdab836a2f5d3e28df9b202 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 14 Dec 2022 14:43:04 -0800 Subject: fsverity: pass pos and size to ->write_merkle_tree_block fsverity_operations::write_merkle_tree_block is passed the index of the block to write and the log base 2 of the block size. However, all implementations of it use these parameters only to calculate the position and the size of the block, in bytes. Therefore, make ->write_merkle_tree_block take 'pos' and 'size' parameters instead of 'index' and 'log_blocksize'. Suggested-by: Dave Chinner Signed-off-by: Eric Biggers Acked-by: Dave Chinner Link: https://lore.kernel.org/r/20221214224304.145712-5-ebiggers@kernel.org --- include/linux/fsverity.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 203f4962c54a..f5ed7ecfd9ab 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -109,9 +109,9 @@ struct fsverity_operations { * Write a Merkle tree block to the given inode. * * @inode: the inode for which the Merkle tree is being built - * @buf: block to write - * @index: 0-based index of the block within the Merkle tree - * @log_blocksize: log base 2 of the Merkle tree block size + * @buf: the Merkle tree block to write + * @pos: the position of the block in the Merkle tree (in bytes) + * @size: the Merkle tree block size (in bytes) * * This is only called between ->begin_enable_verity() and * ->end_enable_verity(). @@ -119,7 +119,7 @@ struct fsverity_operations { * Return: 0 on success, -errno on failure */ int (*write_merkle_tree_block)(struct inode *inode, const void *buf, - u64 index, int log_blocksize); + u64 pos, unsigned int size); }; #ifdef CONFIG_FS_VERITY -- cgit v1.2.3 From d19ab1f785d0b6b9f709799f0938658903821ba1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Dec 2022 15:13:34 +0100 Subject: mtd: cfi: allow building spi-intel standalone When MTD or MTD_CFI_GEOMETRY is disabled, the spi-intel driver fails to build, as it includes the shared CFI header: include/linux/mtd/cfi.h:62:2: error: #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. [-Werror=cpp] 62 | #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. linux/mtd/spi-nor.h does not actually need to include cfi.h, so remove the inclusion here to fix the warning. This uncovers a missing #include in spi-nor/core.c so add that there to prevent a different build issue. Fixes: e23e5a05d1fd ("mtd: spi-nor: intel-spi: Convert to SPI MEM") Signed-off-by: Arnd Bergmann Reviewed-by: Mika Westerberg Reviewed-by: Tokunori Ikegami Acked-by: Pratyush Yadav Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221220141352.1486360-1-arnd@kernel.org --- include/linux/mtd/spi-nor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 25765556223a..a3f8cdca90c8 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -7,7 +7,6 @@ #define __LINUX_MTD_SPI_NOR_H #include -#include #include #include -- cgit v1.2.3 From ed3557c947e1d4164d370cc2d69dd7eb92706f0a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 3 Jan 2023 17:56:39 +0100 Subject: ieee802154: Add support for user scanning requests The ieee802154 layer should be able to scan a set of channels in order to look for beacons advertizing PANs. Supporting this involves adding two user commands: triggering scans and aborting scans. The user should also be notified when a new beacon is received and also upon scan termination. A scan request structure is created to list the requirements and to be accessed asynchronously when changing channels or receiving beacons. Mac layers may now implement the ->trigger_scan() and ->abort_scan() hooks. Co-developed-by: David Girault Signed-off-by: David Girault Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20230103165644.432209-2-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- include/linux/ieee802154.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 0303eb84d596..b22e4147d334 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -44,6 +44,9 @@ #define IEEE802154_SHORT_ADDR_LEN 2 #define IEEE802154_PAN_ID_LEN 2 +/* Duration in superframe order */ +#define IEEE802154_MAX_SCAN_DURATION 14 +#define IEEE802154_ACTIVE_SCAN_DURATION 15 #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 #define IEEE802154_MAX_SIFS_FRAME_SIZE 18 -- cgit v1.2.3 From 57588c71177f0bfc08509c2c3a9bfe32850c0786 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 3 Jan 2023 17:56:44 +0100 Subject: mac802154: Handle passive scanning Implement the core hooks in order to provide the softMAC layer support for passive scans. Scans are requested by the user and can be aborted. Changing channels manually is prohibited during scans. The implementation uses a workqueue triggered at a certain interval depending on the symbol duration for the current channel and the duration order provided. More advanced drivers with internal scheduling capabilities might require additional care but there is none mainline yet. Received beacons during a passive scan are processed in a work queue and their result forwarded to the upper layer. Active scanning is not supported yet. Co-developed-by: David Girault Signed-off-by: David Girault Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20230103165644.432209-7-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- include/linux/ieee802154.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index b22e4147d334..140f61ec0f5f 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -47,6 +47,10 @@ /* Duration in superframe order */ #define IEEE802154_MAX_SCAN_DURATION 14 #define IEEE802154_ACTIVE_SCAN_DURATION 15 +/* Superframe duration in slots */ +#define IEEE802154_SUPERFRAME_PERIOD 16 +/* Various periods expressed in symbols */ +#define IEEE802154_SLOT_PERIOD 60 #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 #define IEEE802154_MAX_SIFS_FRAME_SIZE 18 -- cgit v1.2.3 From c004d231cac709f09987f2a6dd733013039d27c3 Mon Sep 17 00:00:00 2001 From: Zhao Mengmeng Date: Wed, 19 Oct 2022 08:36:50 -0400 Subject: rcu: Use hlist_nulls_next_rcu() in hlist_nulls_add_tail_rcu() In commit 8dbd76e79a16 ("tcp/dccp: fix possible race __inet_lookup_established()"), function hlist_nulls_add_tail_rcu() was added back, but the local variable *last* is of type hlist_nulls_node, so use hlist_nulls_next_rcu() instead of hlist_next_rcu(). Signed-off-by: Zhao Mengmeng Signed-off-by: Paul E. McKenney --- include/linux/rculist_nulls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index d8afdb8784c1..ba4c00dd8005 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -139,7 +139,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, if (last) { n->next = last->next; n->pprev = &last->next; - rcu_assign_pointer(hlist_next_rcu(last), n); + rcu_assign_pointer(hlist_nulls_next_rcu(last), n); } else { hlist_nulls_add_head_rcu(n, h); } -- cgit v1.2.3 From 0cae5ded535c3a80aed94f119bbd4ee3ae284a65 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 14 Dec 2022 11:41:44 -0800 Subject: rcu: Make RCU_LOCKDEP_WARN() avoid early lockdep checks Currently, RCU_LOCKDEP_WARN() checks the condition before checking to see if lockdep is still enabled. This is necessary to avoid the false-positive splats fixed by commit 3066820034b5dd ("rcu: Reject RCU_LOCKDEP_WARN() false positives"). However, the current state can result in false-positive splats during early boot before lockdep is fully initialized. This commit therefore checks debug_lockdep_rcu_enabled() both before and after checking the condition, thus avoiding both sets of false-positive error reports. Reported-by: Steven Rostedt Reported-by: Masami Hiramatsu (Google) Reported-by: Mathieu Desnoyers Signed-off-by: Paul E. McKenney Reviewed-by: Mathieu Desnoyers Cc: Boqun Feng Cc: Matthew Wilcox Cc: Thomas Gleixner --- include/linux/rcupdate.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 03abf883a281..aa86de01aab6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -374,11 +374,18 @@ static inline int debug_lockdep_rcu_enabled(void) * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met * @c: condition to check * @s: informative message + * + * This checks debug_lockdep_rcu_enabled() before checking (c) to + * prevent early boot splats due to lockdep not yet being initialized, + * and rechecks it after checking (c) to prevent false-positive splats + * due to races with lockdep being disabled. See commit 3066820034b5dd + * ("rcu: Reject RCU_LOCKDEP_WARN() false positives") for more detail. */ #define RCU_LOCKDEP_WARN(c, s) \ do { \ static bool __section(".data.unlikely") __warned; \ - if ((c) && debug_lockdep_rcu_enabled() && !__warned) { \ + if (debug_lockdep_rcu_enabled() && (c) && \ + debug_lockdep_rcu_enabled() && !__warned) { \ __warned = true; \ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ } \ -- cgit v1.2.3 From 04a522b7da3dbc083f8ae0aa1a6184b959a8f81c Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 25 Oct 2022 16:46:12 +0200 Subject: rcu: Refactor kvfree_call_rcu() and high-level helpers Currently a kvfree_call_rcu() takes an offset within a structure as a second parameter, so a helper such as a kvfree_rcu_arg_2() has to convert rcu_head and a freed ptr to an offset in order to pass it. That leads to an extra conversion on macro entry. Instead of converting, refactor the code in way that a pointer that has to be freed is passed directly to the kvfree_call_rcu(). This patch does not make any functional change and is transparent to all kvfree_rcu() users. Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 5 ++--- include/linux/rcutiny.h | 12 ++++++------ include/linux/rcutree.h | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 03abf883a281..f38d4469d7f3 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1011,8 +1011,7 @@ do { \ \ if (___p) { \ BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \ - kvfree_call_rcu(&((___p)->rhf), (rcu_callback_t)(unsigned long) \ - (offsetof(typeof(*(ptr)), rhf))); \ + kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \ } \ } while (0) @@ -1021,7 +1020,7 @@ do { \ typeof(ptr) ___p = (ptr); \ \ if (___p) \ - kvfree_call_rcu(NULL, (rcu_callback_t) (___p)); \ + kvfree_call_rcu(NULL, (void *) (___p)); \ } while (0) /* diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 68f9070aa111..7f17acf29dda 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -98,25 +98,25 @@ static inline void synchronize_rcu_expedited(void) */ extern void kvfree(const void *addr); -static inline void __kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) +static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr) { if (head) { - call_rcu(head, func); + call_rcu(head, (rcu_callback_t) ((void *) head - ptr)); return; } // kvfree_rcu(one_arg) call. might_sleep(); synchronize_rcu(); - kvfree((void *) func); + kvfree(ptr); } #ifdef CONFIG_KASAN_GENERIC -void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func); +void kvfree_call_rcu(struct rcu_head *head, void *ptr); #else -static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) +static inline void kvfree_call_rcu(struct rcu_head *head, void *ptr) { - __kvfree_call_rcu(head, func); + __kvfree_call_rcu(head, ptr); } #endif diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 4003bf6cfa1c..56bccb5a8fde 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -33,7 +33,7 @@ static inline void rcu_virt_note_context_switch(void) } void synchronize_rcu_expedited(void); -void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func); +void kvfree_call_rcu(struct rcu_head *head, void *ptr); void rcu_barrier(void); bool rcu_eqs_special_set(int cpu); -- cgit v1.2.3 From aa5210f524edcebf909e6576b515bc3e2d82af0d Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Wed, 16 Nov 2022 09:52:43 +0800 Subject: srcu: Fix a misspelling in comment s/srcu_gq_seq/srcu_gp_seq/ Signed-off-by: Pingfan Liu Cc: Lai Jiangshan Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Reviewed-by: Mukesh Ojha Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index c689a81752c9..558057b517b7 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -49,7 +49,7 @@ struct srcu_data { struct srcu_node { spinlock_t __private lock; unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */ - /* if greater than ->srcu_gq_seq. */ + /* if greater than ->srcu_gp_seq. */ unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ struct srcu_node *srcu_parent; /* Next up in tree. */ -- cgit v1.2.3 From 0b1182bde303dc476ea9712c2c816be2e4f0cf81 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 Nov 2022 15:49:55 -0800 Subject: rcu: Add srcu_down_read() and srcu_up_read() A pair of matching srcu_read_lock() and srcu_read_unlock() invocations must take place within the same context, for example, within the same task. Otherwise, lockdep complains, as is the right thing to do for most use cases. However, there are use cases involving asynchronous I/O where the SRCU reader needs to begin on one task and end on another. This commit therefore supplies the semaphore-like srcu_down_read() and srcu_up_read(), which act like srcu_read_lock() and srcu_read_unlock(), but permitting srcu_up_read() to be invoked in a different context than was the matching srcu_down_read(). Neither srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler. Reported-by: Jan Kara Signed-off-by: Paul E. McKenney Tested-by: Amir Goldstein --- include/linux/srcu.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 9b9d0bbf1d3c..74796cd7e7a9 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -214,6 +214,34 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) return retval; } +/** + * srcu_down_read - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter a semaphore-like SRCU read-side critical section. Note that + * SRCU read-side critical sections may be nested. However, it is + * illegal to call anything that waits on an SRCU grace period for the + * same srcu_struct, whether directly or indirectly. Please note that + * one way to indirectly wait on an SRCU grace period is to acquire + * a mutex that is held elsewhere while calling synchronize_srcu() or + * synchronize_srcu_expedited(). But if you want lockdep to help you + * keep this stuff straight, you should instead use srcu_read_lock(). + * + * The semaphore-like nature of srcu_down_read() means that the matching + * srcu_up_read() can be invoked from some other context, for example, + * from some other task or from an irq handler. However, neither + * srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler. + * + * Calls to srcu_down_read() may be nested, similar to the manner in + * which calls to down_read() may be nested. + */ +static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp) +{ + WARN_ON_ONCE(in_nmi()); + srcu_check_nmi_safety(ssp, false); + return __srcu_read_lock(ssp); +} + /** * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. @@ -254,6 +282,23 @@ srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) __srcu_read_unlock(ssp, idx); } +/** + * srcu_up_read - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Exit an SRCU read-side critical section, but not necessarily from + * the same context as the maching srcu_down_read(). + */ +static inline void srcu_up_read(struct srcu_struct *ssp, int idx) + __releases(ssp) +{ + WARN_ON_ONCE(idx & ~0x1); + WARN_ON_ONCE(in_nmi()); + srcu_check_nmi_safety(ssp, false); + __srcu_read_unlock(ssp, idx); +} + /** * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock * -- cgit v1.2.3 From 47904aed898a08f028572b9b5a5cc101ddfb2d82 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 19 Nov 2022 17:25:03 +0800 Subject: genirq: Fix the return type of kstat_cpu_irqs_sum() The type of member ->irqs_sum is unsigned long, but kstat_cpu_irqs_sum() returns int, which can result in truncation. Therefore, change the kstat_cpu_irqs_sum() function's return value to unsigned long to avoid truncation. Fixes: f2c66cd8eedd ("/proc/stat: scalability of irq num per cpu") Reported-by: Elliott, Robert (Servers) Signed-off-by: Zhen Lei Cc: Tejun Heo Cc: "Peter Zijlstra (Intel)" Cc: Josh Don Cc: Andrew Morton Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/kernel_stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index ddb5a358fd82..90e2fdc17d79 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -75,7 +75,7 @@ extern unsigned int kstat_irqs_usr(unsigned int irq); /* * Number of interrupts per cpu, since bootup */ -static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) +static inline unsigned long kstat_cpu_irqs_sum(unsigned int cpu) { return kstat_cpu(cpu).irqs_sum; } -- cgit v1.2.3 From 28319d6dc5e2ffefa452c2377dd0f71621b5bff0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 25 Nov 2022 14:55:00 +0100 Subject: rcu-tasks: Fix synchronize_rcu_tasks() VS zap_pid_ns_processes() RCU Tasks and PID-namespace unshare can interact in do_exit() in a complicated circular dependency: 1) TASK A calls unshare(CLONE_NEWPID), this creates a new PID namespace that every subsequent child of TASK A will belong to. But TASK A doesn't itself belong to that new PID namespace. 2) TASK A forks() and creates TASK B. TASK A stays attached to its PID namespace (let's say PID_NS1) and TASK B is the first task belonging to the new PID namespace created by unshare() (let's call it PID_NS2). 3) Since TASK B is the first task attached to PID_NS2, it becomes the PID_NS2 child reaper. 4) TASK A forks() again and creates TASK C which get attached to PID_NS2. Note how TASK C has TASK A as a parent (belonging to PID_NS1) but has TASK B (belonging to PID_NS2) as a pid_namespace child_reaper. 5) TASK B exits and since it is the child reaper for PID_NS2, it has to kill all other tasks attached to PID_NS2, and wait for all of them to die before getting reaped itself (zap_pid_ns_process()). 6) TASK A calls synchronize_rcu_tasks() which leads to synchronize_srcu(&tasks_rcu_exit_srcu). 7) TASK B is waiting for TASK C to get reaped. But TASK B is under a tasks_rcu_exit_srcu SRCU critical section (exit_notify() is between exit_tasks_rcu_start() and exit_tasks_rcu_finish()), blocking TASK A. 8) TASK C exits and since TASK A is its parent, it waits for it to reap TASK C, but it can't because TASK A waits for TASK B that waits for TASK C. Pid_namespace semantics can hardly be changed at this point. But the coverage of tasks_rcu_exit_srcu can be reduced instead. The current task is assumed not to be concurrently reapable at this stage of exit_notify() and therefore tasks_rcu_exit_srcu can be temporarily relaxed without breaking its constraints, providing a way out of the deadlock scenario. [ paulmck: Fix build failure by adding additional declaration. ] Fixes: 3f95aa81d265 ("rcu: Make TASKS_RCU handle tasks that are almost done exiting") Reported-by: Pengfei Xu Suggested-by: Boqun Feng Suggested-by: Neeraj Upadhyay Suggested-by: Paul E. McKenney Cc: Oleg Nesterov Cc: Lai Jiangshan Cc: Eric W . Biederman Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 03abf883a281..c0c79beac3fe 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -238,6 +238,7 @@ void synchronize_rcu_tasks_rude(void); #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) void exit_tasks_rcu_start(void); +void exit_tasks_rcu_stop(void); void exit_tasks_rcu_finish(void); #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ #define rcu_tasks_classic_qs(t, preempt) do { } while (0) @@ -246,6 +247,7 @@ void exit_tasks_rcu_finish(void); #define call_rcu_tasks call_rcu #define synchronize_rcu_tasks synchronize_rcu static inline void exit_tasks_rcu_start(void) { } +static inline void exit_tasks_rcu_stop(void) { } static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ -- cgit v1.2.3 From 876293121f24fc1a7df85450d0997f54540c8979 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 29 Dec 2022 17:59:57 +0100 Subject: ata: scsi: rename flag ATA_QCFLAG_FAILED to ATA_QCFLAG_EH The name ATA_QCFLAG_FAILED is misleading since it does not mean that a QC completed in error, or that it didn't complete at all. It means that libata decided to schedule EH for the QC, so the QC is now owned by the libata error handler (EH). The normal execution path is responsible for not accessing a QC owned by EH. libata core enforces the rule by returning NULL from ata_qc_from_tag() for QCs owned by EH. It is quite easy to mistake that a QC marked with ATA_QCFLAG_FAILED was an error. However, a QC that was actually an error is instead indicated by having qc->err_mask set. E.g. when we have a NCQ error, we abort all QCs, which currently will mark all QCs as ATA_QCFLAG_FAILED. However, it will only be a single QC that is an error (i.e. has qc->err_mask set). Rename ATA_QCFLAG_FAILED to ATA_QCFLAG_EH to more clearly highlight that this flag simply means that a QC is now owned by EH. This new name will not mislead to think that the QC was an error (which is instead indicated by having qc->err_mask set). This also makes it more obvious that the EH code skips all QCs that do not have ATA_QCFLAG_EH set (rather than ATA_QCFLAG_FAILED), since the EH code should simply only care about QCs that are owned by EH itself. Signed-off-by: Niklas Cassel Reviewed-by: John Garry Signed-off-by: Damien Le Moal --- include/linux/libata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index c9149ebe7423..7985e6e2ae0e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -206,7 +206,7 @@ enum { ATA_QCFLAG_QUIET = (1 << 6), /* don't report device error */ ATA_QCFLAG_RETRY = (1 << 7), /* retry after failure */ - ATA_QCFLAG_FAILED = (1 << 16), /* cmd failed and is owned by EH */ + ATA_QCFLAG_EH = (1 << 16), /* cmd aborted and owned by EH */ ATA_QCFLAG_SENSE_VALID = (1 << 17), /* sense data valid */ ATA_QCFLAG_EH_SCHEDULED = (1 << 18), /* EH scheduled (obsolete) */ @@ -1756,7 +1756,7 @@ static inline struct ata_queued_cmd *ata_qc_from_tag(struct ata_port *ap, return qc; if ((qc->flags & (ATA_QCFLAG_ACTIVE | - ATA_QCFLAG_FAILED)) == ATA_QCFLAG_ACTIVE) + ATA_QCFLAG_EH)) == ATA_QCFLAG_ACTIVE) return qc; return NULL; -- cgit v1.2.3 From 931139af5718fb41565fff2420daf995c016ec80 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 29 Dec 2022 17:59:58 +0100 Subject: ata: libata: simplify qc_fill_rtf port operation interface The boolean return value of the qc_fill_rtf operation is used nowhere. Simplify this operation interface by making it a void function. All drivers defining this operation are also updated. Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel Reviewed-by: John Garry --- include/linux/libata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 7985e6e2ae0e..8483d8300ea3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -876,7 +876,7 @@ struct ata_port_operations { int (*check_atapi_dma)(struct ata_queued_cmd *qc); enum ata_completion_errors (*qc_prep)(struct ata_queued_cmd *qc); unsigned int (*qc_issue)(struct ata_queued_cmd *qc); - bool (*qc_fill_rtf)(struct ata_queued_cmd *qc); + void (*qc_fill_rtf)(struct ata_queued_cmd *qc); /* * Configuration and exception handling @@ -1936,7 +1936,7 @@ extern void ata_sff_queue_delayed_work(struct delayed_work *dwork, unsigned long delay); extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay); extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc); -extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc); +extern void ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc); extern unsigned int ata_sff_port_intr(struct ata_port *ap, struct ata_queued_cmd *qc); extern irqreturn_t ata_sff_interrupt(int irq, void *dev_instance); -- cgit v1.2.3 From 93c4aa449b88196c7d56a556cb6a2aad21ad8a7a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 29 Dec 2022 17:59:59 +0100 Subject: ata: libata: read the shared status for successful NCQ commands once Currently, the status is being read for each QC, inside ata_qc_complete(), which means that QCs being completed by ata_qc_complete_multiple() (i.e. multiple QCs completed during a single interrupt), can have different status and error bits set. This is because the FIS Receive Area will get updated as soon as the HBA receives a new FIS from the device in the NCQ case. Here is an example of the problem: ata14.00: ata_qc_complete_multiple: done_mask: 0x180000 qc tag: 19 cmd: 0x61 flags: 0x11b err_mask: 0x0 tf->status: 0x40 qc tag: 20 cmd: 0x61 flags: 0x11b err_mask: 0x0 tf->status: 0x43 A print in ata_qc_complete_multiple(), shows that done_mask is: 0x180000 which means that tag 19 and 20 were completed. Another print in ata_qc_complete(), after the call to fill_result_tf(), shows that tag 19 and 20 have different status values, even though they were completed in the same ata_qc_complete_multiple() call. If PMP is not enabled, simply read the status and error once, before calling ata_qc_complete() for each QC. Without PMP, we know that all QCs must share the same status and error values. If PMP is enabled, we also read the status before calling ata_qc_complete(), however, we still read the status for each QC, since the QCs can belong to different PMP links (which means that the QCs does not necessarily share the same status and error values). Do all this by introducing the new port operation .qc_ncq_fill_rtf. If set, this operation is called in ata_qc_complete_multiple() to set the result tf for all completed QCs signaled by the last SDB FIS received. QCs that have their result tf filled are marked with the new flag ATA_QCFLAG_RTF_FILLED so that any later execution of the qc_fill_rtf port operation does nothing (e.g. when called from ata_qc_complete()). Co-developed-by: Damien Le Moal Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 8483d8300ea3..f54e02dadc6f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -200,6 +200,7 @@ enum { /* struct ata_queued_cmd flags */ ATA_QCFLAG_ACTIVE = (1 << 0), /* cmd not yet ack'd to scsi lyer */ ATA_QCFLAG_DMAMAP = (1 << 1), /* SG table is DMA mapped */ + ATA_QCFLAG_RTF_FILLED = (1 << 2), /* result TF has been filled */ ATA_QCFLAG_IO = (1 << 3), /* standard IO command */ ATA_QCFLAG_RESULT_TF = (1 << 4), /* result TF requested */ ATA_QCFLAG_CLEAR_EXCL = (1 << 5), /* clear excl_link on completion */ @@ -877,6 +878,7 @@ struct ata_port_operations { enum ata_completion_errors (*qc_prep)(struct ata_queued_cmd *qc); unsigned int (*qc_issue)(struct ata_queued_cmd *qc); void (*qc_fill_rtf)(struct ata_queued_cmd *qc); + void (*qc_ncq_fill_rtf)(struct ata_port *ap, u64 done_mask); /* * Configuration and exception handling -- cgit v1.2.3 From 87aab3c4cd59aef42fe280fece2be8b8156b99e0 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 29 Dec 2022 18:00:01 +0100 Subject: ata: libata: move NCQ related ATA_DFLAGs ata_dev_configure() starts off by clearing all flags in ATA_DFLAG_CFG_MASK: dev->flags &= ~ATA_DFLAG_CFG_MASK; ata_dev_configure() then calls ata_dev_config_lba() which calls ata_dev_config_ncq(). ata_dev_config_ncq() will set the correct ATA_DFLAGs depending on what is actually supported. Since these flags are set by ata_dev_configure(), they should be in ATA_DFLAG_CFG_MASK and not in ATA_DFLAG_INIT_MASK. ATA_DFLAG_NCQ_PRIO_ENABLED is set via sysfs, is should therefore not be in ATA_DFLAG_CFG_MASK. It also cannot be in ATA_DFLAG_INIT_MASK, because ata_eh_schedule_probe() calls ata_dev_init(), which will clear all flags in ATA_DFLAG_INIT_MASK. This means that ATA_DFLAG_NCQ_PRIO_ENABLED (the value the user sets via sysfs) would get silently cleared if ata_eh_schedule_probe() is called. While that should only happen in certain circumstances, it still doesn't seem right that it can get silently cleared. (ata_dev_config_ncq_prio() will still clear the ATA_DFLAG_NCQ_PRIO_ENABLED flag if ATA_DFLAG_NCQ_PRIO is suddenly no longer supported after a revalidation.) Because of this, move ATA_DFLAG_NCQ_PRIO_ENABLED to be outside of both ATA_DFLAG_CFG_MASK and ATA_DFLAG_INIT_MASK. Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- include/linux/libata.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index f54e02dadc6f..3b7f5d9e2f87 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -91,22 +91,21 @@ enum { ATA_DFLAG_AN = (1 << 7), /* AN configured */ ATA_DFLAG_TRUSTED = (1 << 8), /* device supports trusted send/recv */ ATA_DFLAG_DMADIR = (1 << 10), /* device requires DMADIR */ - ATA_DFLAG_CFG_MASK = (1 << 12) - 1, + ATA_DFLAG_NCQ_SEND_RECV = (1 << 11), /* device supports NCQ SEND and RECV */ + ATA_DFLAG_NCQ_PRIO = (1 << 12), /* device supports NCQ priority */ + ATA_DFLAG_CFG_MASK = (1 << 13) - 1, - ATA_DFLAG_PIO = (1 << 12), /* device limited to PIO mode */ - ATA_DFLAG_NCQ_OFF = (1 << 13), /* device limited to non-NCQ mode */ + ATA_DFLAG_PIO = (1 << 13), /* device limited to PIO mode */ + ATA_DFLAG_NCQ_OFF = (1 << 14), /* device limited to non-NCQ mode */ ATA_DFLAG_SLEEPING = (1 << 15), /* device is sleeping */ ATA_DFLAG_DUBIOUS_XFER = (1 << 16), /* data transfer not verified */ ATA_DFLAG_NO_UNLOAD = (1 << 17), /* device doesn't support unload */ ATA_DFLAG_UNLOCK_HPA = (1 << 18), /* unlock HPA */ - ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */ - ATA_DFLAG_NCQ_PRIO = (1 << 20), /* device supports NCQ priority */ - ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 21), /* Priority cmds sent to dev */ - ATA_DFLAG_INIT_MASK = (1 << 24) - 1, + ATA_DFLAG_INIT_MASK = (1 << 19) - 1, + ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 19), /* Priority cmds sent to dev */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), - ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ ATA_DFLAG_DEVSLP = (1 << 27), /* device supports Device Sleep */ ATA_DFLAG_ACPI_DISABLED = (1 << 28), /* ACPI for the device is disabled */ -- cgit v1.2.3 From 33e3f0a3358b8f9bb54b2661b9c1d37a75664c79 Mon Sep 17 00:00:00 2001 From: Richard Clark Date: Tue, 13 Dec 2022 12:39:36 +0800 Subject: workqueue: Add a new flag to spot the potential UAF error Currently if the user queues a new work item unintentionally into a wq after the destroy_workqueue(wq), the work still can be queued and scheduled without any noticeable kernel message before the end of a RCU grace period. As a debug-aid facility, this commit adds a new flag __WQ_DESTROYING to spot that issue by triggering a kernel WARN message. Signed-off-by: Richard Clark Reviewed-by: Lai Jiangshan Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index a0143dd24430..ac551b8ee7d9 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -335,6 +335,7 @@ enum { */ WQ_POWER_EFFICIENT = 1 << 7, + __WQ_DESTROYING = 1 << 15, /* internal: workqueue is destroying */ __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ -- cgit v1.2.3 From a4b2e6063cfeaab1160501acfb27e8618a7c693f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 15 Dec 2022 16:20:23 +0100 Subject: firmware: xilinx: Clear IOCTL_SET_SD_TAPDELAY using PM_MMIO_WRITE In case the tap delay required by Arasan SDHCI is set to 0, the current embeddedsw firmware unconditionally writes IOU_SLCR SD_ITAPDLY to 0x100 (SD0_ITAPDLYENA=1, SD0_ITAPDLYSEL=0). Previous behavior was to keep the IOU_SLCR SD_ITAPDLY set to 0x0. There is some sort of difference in the behavior between SD0_ITAPDLYENA=1/0 with the same SD0_ITAPDLYSEL=0, even though the behavior should be identical -- zero delay added to rxclk_in line. The former breaks HS200 training in low temperature conditions. Write IOU_SLCR SD_ITAPDLY register to 0 using PM_MMIO_WRITE which seem to allow unrestricted WRITE access (and PM_MMIO_READ which allows read access) to the entire address space. This way, it is possible to work around the defect in IOCTL_SET_SD_TAPDELAY design which does not permit clearing SDx_ITAPDLYENA bit. Note that the embeddedsw firmware does not permit clearing the SD_ITAPDLY SD0_ITAPDLYENA bit, this bit can only ever be set by the firmware and it is often impossible to update the possibly broken firmware. Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20221215152023.8387-1-marex@denx.de Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index b986e267d149..eb88b4ba62f9 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -79,6 +79,10 @@ #define EVENT_ERROR_PSM_ERR1 (0x28108000U) #define EVENT_ERROR_PSM_ERR2 (0x2810C000U) +/* ZynqMP SD tap delay tuning */ +#define SD_ITAPDLY 0xFF180314 +#define SD_OTAPDLYSEL 0xFF180318 + enum pm_api_cb_id { PM_INIT_SUSPEND_CB = 30, PM_ACKNOWLEDGE_CB = 31, -- cgit v1.2.3 From 19e183b54528f11fafeca60fc6d0821e29ff281e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Dec 2022 18:12:50 +0000 Subject: elfcore: Add a cprm parameter to elf_core_extra_{phdrs,data_size} A subsequent fix for arm64 will use this parameter to parse the vma information from the snapshot created by dump_vma_snapshot() rather than traversing the vma list without the mmap_lock. Fixes: 6dd8b1a0b6cb ("arm64: mte: Dump the MTE tags in the core file") Cc: # 5.18.x Signed-off-by: Catalin Marinas Reported-by: Seth Jenkins Suggested-by: Seth Jenkins Cc: Will Deacon Cc: Eric Biederman Cc: Kees Cook Link: https://lore.kernel.org/r/20221222181251.1345752-3-catalin.marinas@arm.com Signed-off-by: Will Deacon --- include/linux/elfcore.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 9ec81290e3c8..bd5560542c79 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -105,14 +105,14 @@ int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu); * Dumping its extra ELF program headers includes all the other information * a debugger needs to easily find how the gate DSO was being used. */ -extern Elf_Half elf_core_extra_phdrs(void); +extern Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm); extern int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset); extern int elf_core_write_extra_data(struct coredump_params *cprm); -extern size_t elf_core_extra_data_size(void); +extern size_t elf_core_extra_data_size(struct coredump_params *cprm); #else -static inline Elf_Half elf_core_extra_phdrs(void) +static inline Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return 0; } @@ -127,7 +127,7 @@ static inline int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -static inline size_t elf_core_extra_data_size(void) +static inline size_t elf_core_extra_data_size(struct coredump_params *cprm) { return 0; } -- cgit v1.2.3 From 439a1bcac648fe9b59210cde8991fb2acf37bdab Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 19 Sep 2022 15:53:13 -0700 Subject: fortify: Use __builtin_dynamic_object_size() when available Since the commits starting with c37495d6254c ("slab: add __alloc_size attributes for better bounds checking"), the compilers have runtime allocation size hints available in some places. This was immediately available to CONFIG_UBSAN_BOUNDS, but CONFIG_FORTIFY_SOURCE needed updating to explicitly make use of the hints via the associated __builtin_dynamic_object_size() helper. Detect and use the builtin when it is available, increasing the accuracy of the mitigation. When runtime sizes are not available, __builtin_dynamic_object_size() falls back to __builtin_object_size(), leaving the existing bounds checking unchanged. Additionally update the VMALLOC_LINEAR_OVERFLOW LKDTM test to make the hint invisible, otherwise the architectural defense is not exercised (the buffer overflow is detected in the memset() rather than when it crosses the edge of the allocation). Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Nick Desaulniers Cc: Nathan Chancellor Cc: Tom Rix Cc: linux-hardening@vger.kernel.org Cc: llvm@lists.linux.dev Reviewed-by: Miguel Ojeda # include/linux/compiler_attributes.h Signed-off-by: Kees Cook --- include/linux/compiler_attributes.h | 5 +++++ include/linux/fortify-string.h | 7 +++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 898b3458b24a..56467f86a27c 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -297,6 +297,11 @@ * * clang: https://clang.llvm.org/docs/AttributeReference.html#pass-object-size-pass-dynamic-object-size */ +#if __has_attribute(__pass_dynamic_object_size__) +# define __pass_dynamic_object_size(type) __attribute__((__pass_dynamic_object_size__(type))) +#else +# define __pass_dynamic_object_size(type) +#endif #if __has_attribute(__pass_object_size__) # define __pass_object_size(type) __attribute__((__pass_object_size__(type))) #else diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 7cad8bb031e9..c9de1f59ee80 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -90,10 +90,17 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) * size, rather than struct size), but there remain some stragglers using * type 0 that will be converted in the future. */ +#if __has_builtin(__builtin_dynamic_object_size) +#define POS __pass_dynamic_object_size(1) +#define POS0 __pass_dynamic_object_size(0) +#define __struct_size(p) __builtin_dynamic_object_size(p, 0) +#define __member_size(p) __builtin_dynamic_object_size(p, 1) +#else #define POS __pass_object_size(1) #define POS0 __pass_object_size(0) #define __struct_size(p) __builtin_object_size(p, 0) #define __member_size(p) __builtin_object_size(p, 1) +#endif #define __compiletime_lessthan(bounds, length) ( \ __builtin_constant_p((bounds) < (length)) && \ -- cgit v1.2.3 From 3ca0a6ea8f6de0f21a9331ff3596a9c073fbdab0 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 19 Nov 2022 17:25:04 +0800 Subject: sched: Add helper kstat_cpu_softirqs_sum() Add a kstat_cpu_softirqs_sum() function that is similar to kstat_cpu_irqs_sum(), but which counts software interrupts since boot on the specified CPU. Signed-off-by: Zhen Lei Cc: Josh Don Cc: Tejun Heo Cc: Peter Zijlstra Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/kernel_stat.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 90e2fdc17d79..898076e173a9 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -67,6 +67,17 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) return kstat_cpu(cpu).softirqs[irq]; } +static inline unsigned int kstat_cpu_softirqs_sum(int cpu) +{ + int i; + unsigned int sum = 0; + + for (i = 0; i < NR_SOFTIRQS; i++) + sum += kstat_softirqs_cpu(i, cpu); + + return sum; +} + /* * Number of interrupts per specific IRQ source, since bootup */ -- cgit v1.2.3 From 7c182722a0a9447e31f9645de4f311e5bc59b480 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 19 Nov 2022 17:25:05 +0800 Subject: sched: Add helper nr_context_switches_cpu() Add a function nr_context_switches_cpu() that returns number of context switches since boot on the specified CPU. This information will be used to diagnose RCU CPU stalls. Signed-off-by: Zhen Lei Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Daniel Bristot de Oliveira Cc: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/kernel_stat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 898076e173a9..9935f7ecbfb9 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -52,6 +52,7 @@ DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); #define kstat_cpu(cpu) per_cpu(kstat, cpu) #define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu) +extern unsigned long long nr_context_switches_cpu(int cpu); extern unsigned long long nr_context_switches(void); extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); -- cgit v1.2.3 From 589c3357370a596ef7c99c00baca8ac799fce531 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Sun, 11 Dec 2022 23:06:19 -0800 Subject: PCI/CXL: Export native CXL error reporting control CXL _OSC Error Reporting Control is used by the OS to determine if Firmware has control of various CXL error reporting capabilities including the event logs. Expose the result of negotiating CXL Error Reporting Control in struct pci_host_bridge for consumption by the CXL drivers. Cc: Bjorn Helgaas Cc: Lukas Wunner Cc: linux-pci@vger.kernel.org Cc: linux-acpi@vger.kernel.org Signed-off-by: Ira Weiny Acked-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/20221212070627.1372402-2-ira.weiny@intel.com Signed-off-by: Dan Williams --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index adffd65e84b4..22319ea71ab0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -578,6 +578,7 @@ struct pci_host_bridge { unsigned int native_pme:1; /* OS may use PCIe PME */ unsigned int native_ltr:1; /* OS may use PCIe LTR */ unsigned int native_dpc:1; /* OS may use PCIe DPC */ + unsigned int native_cxl_error:1; /* OS may use CXL RAS/Events */ unsigned int preserve_config:1; /* Preserve FW resource setup */ unsigned int size_windows:1; /* Enable root bus sizing */ unsigned int msi_domain:1; /* Bridge wants MSI domain */ -- cgit v1.2.3 From 37e2b57078d412547f1c14bb83eff86ab9d92d3d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 3 Sep 2022 00:08:51 +0100 Subject: exportfs: Replace zero-length array with DECLARE_FLEX_ARRAY() helper Zero-length arrays are deprecated and we are moving towards adopting C99 flexible-array members instead. So, replace zero-length array declaration in struct fid with the new DECLARE_FLEX_ARRAY() helper macro. This helper allows for a flexible-array member in a union. Link: https://github.com/KSPP/linux/issues/21 Link: https://github.com/KSPP/linux/issues/193 Link: https://github.com/KSPP/linux/issues/197 Link: https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- include/linux/exportfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fe848901fcc3..1640d97e4c0d 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -131,7 +131,7 @@ struct fid { u32 parent_block; u32 parent_generation; } udf; - __u32 raw[0]; + DECLARE_FLEX_ARRAY(__u32, raw); }; }; -- cgit v1.2.3 From 06919d226d01132c03d851966d4df3870fa1b55a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 3 Sep 2022 00:17:18 +0100 Subject: mm/memremap: Replace zero-length array with DECLARE_FLEX_ARRAY() helper Zero-length arrays are deprecated and we are moving towards adopting C99 flexible-array members, instead. So, replace zero-length array declaration in struct dev_pagemap with the new DECLARE_FLEX_ARRAY() helper macro. This helper allows for a flexible-array member in a union. Also, this addresses multiple warnings reported when building with Clang-15 and -Wzero-length-array. Link: https://github.com/KSPP/linux/issues/193 Link: https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- include/linux/memremap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 7fcaf3180a5b..1314d9c5f05b 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -135,7 +135,7 @@ struct dev_pagemap { int nr_range; union { struct range range; - struct range ranges[0]; + DECLARE_FLEX_ARRAY(struct range, ranges); }; }; -- cgit v1.2.3 From d90fa2c64d59f5f151beeef5dbc599784b3391ca Mon Sep 17 00:00:00 2001 From: Rob Barnes Date: Wed, 4 Jan 2023 01:15:23 +0000 Subject: platform/chrome: cros_ec: Poll EC log on EC panic Add handler for CrOS EC panic events. When a panic is reported, immediately poll for EC log. This should result in the log leading to the EC panic being preserved. ACPI_NOTIFY_CROS_EC_PANIC is defined in coreboot at https://review.coreboot.org/plugins/gitiles/coreboot/+/refs/heads/master/src/ec/google/chromeec/acpi/ec.asl Signed-off-by: Rob Barnes Reviewed-by: Prashant Malani Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20230104011524.369764-2-robbarnes@google.com --- include/linux/platform_data/cros_ec_proto.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index e43107e0bee1..7fb2196f99b0 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -41,6 +41,13 @@ #define EC_MAX_REQUEST_OVERHEAD 1 #define EC_MAX_RESPONSE_OVERHEAD 32 +/* + * EC panic is not covered by the standard (0-F) ACPI notify values. + * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF + * device specific ACPI notify range. + */ +#define ACPI_NOTIFY_CROS_EC_PANIC 0xB0 + /* * Command interface between EC and AP, for LPC, I2C and SPI interfaces. */ @@ -176,6 +183,8 @@ struct cros_ec_device { /* The platform devices used by the mfd driver */ struct platform_device *ec; struct platform_device *pd; + + struct blocking_notifier_head panic_notifier; }; /** -- cgit v1.2.3 From 2c7bc10d0f7b0f66d9042ed88bb3ecd588352a00 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 Jan 2023 20:05:21 -0800 Subject: netlink: add macro for checking dump ctx size We encourage casting struct netlink_callback::ctx to a local struct (in a comment above the field). Provide a convenience macro for checking if the local struct fits into the ctx. Reviewed-by: Jacob Keller Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/linux/netlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index d81bde5a5844..38f6334f408c 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -263,6 +263,10 @@ struct netlink_callback { }; }; +#define NL_ASSET_DUMP_CTX_FITS(type_name) \ + BUILD_BUG_ON(sizeof(type_name) > \ + sizeof_field(struct netlink_callback, ctx)) + struct netlink_notify { struct net *net; u32 portid; -- cgit v1.2.3 From 7c3d5c20dc169e55064f7f38c1c56cfbc39ee5b2 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 3 Oct 2022 11:25:34 +0200 Subject: thermal/core: Add a generic thermal_zone_get_trip() function The thermal_zone_device_ops structure defines a set of ops family, get_trip_temp(), get_trip_hyst(), get_trip_type(). Each of them is returning a property of a trip point. The result is the code is calling the ops everywhere to get a trip point which is supposed to be defined in the backend driver. It is a non-sense as a thermal trip can be generic and used by the backend driver to declare its trip points. Part of the thermal framework has been changed and all the OF thermal drivers are using the same definition for the trip point and use a thermal zone registration variant to pass those trip points which are part of the thermal zone device structure. Consequently, we can use a generic function to get the trip points when they are stored in the thermal zone device structure. This approach can be generalized to all the drivers and we can get rid of the ops->get_trip_*. That will result to a much more simpler code and make possible to rework how the thermal trip are handled in the thermal core framework as discussed previously. This change adds a function thermal_zone_get_trip() where we get the thermal trip point structure which contains all the properties (type, temp, hyst) instead of doing multiple calls to ops->get_trip_*. That opens the door for trip point extension with more attributes. For instance, replacing the trip points disabled bitmask with a 'disabled' field in the structure. Here we replace all the calls to ops->get_trip_* in the thermal core code with a call to the thermal_zone_get_trip() function. The thermal zone ops defines a callback to retrieve the critical temperature. As the trip handling is being reworked, all the trip points will be the same whatever the driver and consequently finding the critical trip temperature will be just a loop to search for a critical trip point type. Provide such a generic function, so we encapsulate the ops get_crit_temp() which can be removed when all the backend drivers are using the generic trip points handling. While at it, add the thermal_zone_get_num_trips() to encapsulate the code more and reduce the grip with the thermal framework internals. Signed-off-by: Daniel Lezcano Acked-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Link: https://lore.kernel.org/r/20221003092602.1323944-2-daniel.lezcano@linaro.org --- include/linux/thermal.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 5e093602e8fc..2198b3631f61 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -334,6 +334,13 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev, } #endif +int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, + struct thermal_trip *trip); + +int thermal_zone_get_num_trips(struct thermal_zone_device *tz); + +int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp); + #ifdef CONFIG_THERMAL struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, void *, struct thermal_zone_device_ops *, -- cgit v1.2.3 From 2e38a2a981b24a9e86e687d32708a3d02707c8f6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 3 Oct 2022 11:25:36 +0200 Subject: thermal/core: Add a generic thermal_zone_set_trip() function The thermal zone ops defines a set_trip callback where we can invoke the backend driver to set an interrupt for the next trip point temperature being crossed the way up or down, or setting the low level with the hysteresis. The ops is only called from the thermal sysfs code where the userspace has the ability to modify a trip point characteristic. With the effort of encapsulating the thermal framework core code, let's create a thermal_zone_set_trip() which is the writable side of the thermal_zone_get_trip() and put there all the ops encapsulation. Signed-off-by: Daniel Lezcano Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221003092602.1323944-4-daniel.lezcano@linaro.org --- include/linux/thermal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 2198b3631f61..e2797f314d99 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -337,6 +337,9 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev, int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, struct thermal_trip *trip); +int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id, + const struct thermal_trip *trip); + int thermal_zone_get_num_trips(struct thermal_zone_device *tz); int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp); -- cgit v1.2.3 From e6ec64f85237b48c071158617cfc954a30285fc7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 14 Dec 2022 14:16:14 +0100 Subject: thermal/drivers/qcom: Fix set_trip_temp() deadlock The set_trip_temp() callback is used when changing the trip temperature through sysfs. As it is called with the thermal-zone-device lock held it must not use thermal_zone_get_trip() directly or it will deadlock. Fixes: 78c3e2429be8 ("thermal/drivers/qcom: Use generic thermal_zone_get_trip() function") Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221214131617.2447-2-johan+linaro@kernel.org Signed-off-by: Daniel Lezcano --- include/linux/thermal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index e2797f314d99..30353e4b1424 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -334,6 +334,8 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev, } #endif +int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, + struct thermal_trip *trip); int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, struct thermal_trip *trip); -- cgit v1.2.3 From 4d70c74659d9746502b23d055dba03d1d28ec388 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 30 Nov 2022 15:48:35 +0200 Subject: i915: Move list_count() to list.h as list_count_nodes() for broader use Some of the existing users, and definitely will be new ones, want to count existing nodes in the list. Provide a generic API for that by moving code from i915 to list.h. Reviewed-by: Lucas De Marchi Acked-by: Jani Nikula Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221130134838.23805-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/list.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 61762054b4be..f10344dbad4d 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -655,6 +655,21 @@ static inline void list_splice_tail_init(struct list_head *list, !list_is_head(pos, (head)); \ pos = n, n = pos->prev) +/** + * list_count_nodes - count nodes in the list + * @head: the head for your list. + */ +static inline size_t list_count_nodes(struct list_head *head) +{ + struct list_head *pos; + size_t count = 0; + + list_for_each(pos, head) + count++; + + return count; +} + /** * list_entry_is_head - test if the entry points to the head of the list * @pos: the type * to cursor -- cgit v1.2.3 From 9b9ad70f28675c45ef93cfb6e4af9403cb7bd34c Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Fri, 6 Jan 2023 17:40:42 +0530 Subject: remoteproc: pru: Add enum for PRU Core Identifiers. Introducing enum pruss_pru_id for PRU Core Identifiers. PRUSS_PRU0 indicates PRU Core 0. PRUSS_PRU1 indicates PRU Core 1. PRUSS_NUM_PRUS indicates the total number of PRU Cores. Signed-off-by: MD Danish Anwar Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20230106121046.886863-3-danishanwar@ti.com Signed-off-by: Mathieu Poirier --- include/linux/remoteproc/pruss.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/remoteproc/pruss.h (limited to 'include/linux') diff --git a/include/linux/remoteproc/pruss.h b/include/linux/remoteproc/pruss.h new file mode 100644 index 000000000000..fbadfcfacb34 --- /dev/null +++ b/include/linux/remoteproc/pruss.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PRU-ICSS Subsystem user interfaces + * + * Copyright (C) 2015-2022 Texas Instruments Incorporated - http://www.ti.com + * Suman Anna + */ + +#ifndef __LINUX_PRUSS_H +#define __LINUX_PRUSS_H + +#include +#include + +#define PRU_RPROC_DRVNAME "pru-rproc" + +/** + * enum pruss_pru_id - PRU core identifiers + * @PRUSS_PRU0: PRU Core 0. + * @PRUSS_PRU1: PRU Core 1. + * @PRUSS_NUM_PRUS: Total number of PRU Cores available. + * + */ + +enum pruss_pru_id { + PRUSS_PRU0 = 0, + PRUSS_PRU1, + PRUSS_NUM_PRUS, +}; + +#endif /* __LINUX_PRUSS_H */ -- cgit v1.2.3 From 919e8942548aa878d374b1b51aa68fdf751f18b8 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Fri, 6 Jan 2023 17:40:43 +0530 Subject: remoteproc: pru: Add APIs to get and put the PRU cores Add two new APIs, pru_rproc_get() and pru_rproc_put(), to the PRU driver to allow client drivers to acquire and release the remoteproc device associated with a PRU core. The PRU cores are treated as resources with only one client owning it at a time. The pru_rproc_get() function returns the rproc handle corresponding to a PRU core identified by the device tree "ti,prus" property under the client node. The pru_rproc_put() is the complementary function to pru_rproc_get(). Signed-off-by: Suman Anna Signed-off-by: Tero Kristo Signed-off-by: Grzegorz Jaszczyk Signed-off-by: MD Danish Anwar Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20230106121046.886863-4-danishanwar@ti.com Signed-off-by: Mathieu Poirier --- include/linux/remoteproc/pruss.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc/pruss.h b/include/linux/remoteproc/pruss.h index fbadfcfacb34..579dafbbaccf 100644 --- a/include/linux/remoteproc/pruss.h +++ b/include/linux/remoteproc/pruss.h @@ -28,4 +28,34 @@ enum pruss_pru_id { PRUSS_NUM_PRUS, }; +struct device_node; + +#if IS_ENABLED(CONFIG_PRU_REMOTEPROC) + +struct rproc *pru_rproc_get(struct device_node *np, int index, + enum pruss_pru_id *pru_id); +void pru_rproc_put(struct rproc *rproc); + +#else + +static inline struct rproc * +pru_rproc_get(struct device_node *np, int index, enum pruss_pru_id *pru_id) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void pru_rproc_put(struct rproc *rproc) { } + +#endif /* CONFIG_PRU_REMOTEPROC */ + +static inline bool is_pru_rproc(struct device *dev) +{ + const char *drv_name = dev_driver_string(dev); + + if (strncmp(drv_name, PRU_RPROC_DRVNAME, sizeof(PRU_RPROC_DRVNAME))) + return false; + + return true; +} + #endif /* __LINUX_PRUSS_H */ -- cgit v1.2.3 From 102853400321baea2527917e6e89be33508c3e18 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 6 Jan 2023 17:40:45 +0530 Subject: remoteproc: pru: Add pru_rproc_set_ctable() function Some firmwares expect the OS drivers to configure the CTABLE entries publishing dynamically allocated memory regions. For example, the PRU Ethernet firmwares use the C28 and C30 entries for retrieving the Shared RAM and System SRAM (OCMC) areas allocated by the PRU Ethernet client driver. Provide a way for users to do that through a new API, pru_rproc_set_ctable(). The API returns 0 on success and a negative value on error. NOTE: The programmable CTABLE entries are typically re-programmed by the PRU firmwares when dealing with a certain block of memory during block processing. This API provides an interface to the PRU client drivers to publish a dynamically allocated memory block with the PRU firmware using a CTABLE entry instead of a negotiated address in shared memory. Additional synchronization may be needed between the PRU client drivers and firmwares if different addresses needs to be published at run-time reusing the same CTABLE entry. CTABLE for stands for "constant table". Each CTable entry just holds the upper address bits so PRU can reference to external memory with larger address bits. For use case please see prueth_sw_emac_config() in "drivers/net/ethernet/ti/prueth_switch.c" /* Set in constant table C28 of PRUn to ICSS Shared memory */ pru_rproc_set_ctable(prueth->pru0, PRU_C28, sharedramaddr); pru_rproc_set_ctable(prueth->pru1, PRU_C28, sharedramaddr); /* Set in constant table C30 of PRUn to OCMC memory */ pru_rproc_set_ctable(prueth->pru0, PRU_C30, ocmcaddr); pru_rproc_set_ctable(prueth->pru1, PRU_C30, ocmcaddr); Signed-off-by: Andrew F. Davis Signed-off-by: Suman Anna Signed-off-by: Roger Quadros Signed-off-by: Grzegorz Jaszczyk Signed-off-by: MD Danish Anwar Link: https://lore.kernel.org/r/20230106121046.886863-6-danishanwar@ti.com Signed-off-by: Mathieu Poirier --- include/linux/remoteproc/pruss.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc/pruss.h b/include/linux/remoteproc/pruss.h index 579dafbbaccf..039b50d58df2 100644 --- a/include/linux/remoteproc/pruss.h +++ b/include/linux/remoteproc/pruss.h @@ -28,13 +28,29 @@ enum pruss_pru_id { PRUSS_NUM_PRUS, }; +/* + * enum pru_ctable_idx - Configurable Constant table index identifiers + */ +enum pru_ctable_idx { + PRU_C24 = 0, + PRU_C25, + PRU_C26, + PRU_C27, + PRU_C28, + PRU_C29, + PRU_C30, + PRU_C31, +}; + struct device_node; +struct rproc; #if IS_ENABLED(CONFIG_PRU_REMOTEPROC) struct rproc *pru_rproc_get(struct device_node *np, int index, enum pruss_pru_id *pru_id); void pru_rproc_put(struct rproc *rproc); +int pru_rproc_set_ctable(struct rproc *rproc, enum pru_ctable_idx c, u32 addr); #else @@ -46,6 +62,12 @@ pru_rproc_get(struct device_node *np, int index, enum pruss_pru_id *pru_id) static inline void pru_rproc_put(struct rproc *rproc) { } +static inline int pru_rproc_set_ctable(struct rproc *rproc, + enum pru_ctable_idx c, u32 addr) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_PRU_REMOTEPROC */ static inline bool is_pru_rproc(struct device *dev) -- cgit v1.2.3 From 33ae3d0955943ac5bacfcb6911cf7cb74822bf8c Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Fri, 6 Jan 2023 23:28:03 +0800 Subject: soc: qcom: apr: make remove callback of apr driver void returned Since commit fc7a6209d571 ("bus: Make remove callback return void") forces bus_type::remove be void-returned, it doesn't make much sense for any bus based driver implementing remove callbalk to return non-void to its caller. As such, change the remove function for apr bus based drivers to return void. Signed-off-by: Dawei Li Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/TYCP286MB23232B7968D34DB8323B0F16CAFB9@TYCP286MB2323.JPNP286.PROD.OUTLOOK.COM --- include/linux/soc/qcom/apr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h index 23c5b30f3511..be98aebcb3e1 100644 --- a/include/linux/soc/qcom/apr.h +++ b/include/linux/soc/qcom/apr.h @@ -153,7 +153,7 @@ typedef struct apr_device gpr_device_t; struct apr_driver { int (*probe)(struct apr_device *sl); - int (*remove)(struct apr_device *sl); + void (*remove)(struct apr_device *sl); int (*callback)(struct apr_device *a, struct apr_resp_pkt *d); int (*gpr_callback)(struct gpr_resp_pkt *d, void *data, int op); -- cgit v1.2.3 From 2ad9bd8332ac1bc072cc7d785e7cb09d6ad36f4c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 5 Jan 2023 11:07:30 -0800 Subject: iov: add import_ubuf() Like import_single_range(), but for ITER_UBUF. Signed-off-by: Jens Axboe Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig --- include/linux/uio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 9f158238edba..73b1d5d1e4f1 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -346,6 +346,7 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec, struct iov_iter *i, bool compat); int import_single_range(int type, void __user *buf, size_t len, struct iovec *iov, struct iov_iter *i); +int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i); static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, void __user *buf, size_t count) -- cgit v1.2.3 From b6c00fb9949fbd073e651a77aa75faca978cf2a6 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 4 Jan 2023 12:13:41 -0800 Subject: perf: Add PMU_FORMAT_ATTR_SHOW The macro PMU_FORMAT_ATTR facilitates the definition of both the "show" function and "format_attr". But it only works for a non-hybrid platform. For a hybrid platform, the name "format_attr_hybrid_" is used. The definition of the "show" function can be shared between a non-hybrid platform and a hybrid platform. Add a new macro PMU_FORMAT_ATTR_SHOW. No functional change. The PMU_FORMAT_ATTR_SHOW will be used in the following patch. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230104201349.1451191-1-kan.liang@linux.intel.com --- include/linux/perf_event.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c6a3bac76966..ad92ad37600e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1724,7 +1724,7 @@ static struct perf_pmu_events_attr _var = { \ .id = _id, } \ })[0].attr.attr) -#define PMU_FORMAT_ATTR(_name, _format) \ +#define PMU_FORMAT_ATTR_SHOW(_name, _format) \ static ssize_t \ _name##_show(struct device *dev, \ struct device_attribute *attr, \ @@ -1733,6 +1733,9 @@ _name##_show(struct device *dev, \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ + +#define PMU_FORMAT_ATTR(_name, _format) \ + PMU_FORMAT_ATTR_SHOW(_name, _format) \ \ static struct device_attribute format_attr_##_name = __ATTR_RO(_name) -- cgit v1.2.3 From 7bdb1767bf011c7f6065ac483ad2f00e434c3979 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 29 Dec 2022 12:40:59 -0800 Subject: perf/core: Change the layout of perf_sample_data The layout of perf_sample_data is designed to minimize cache-line access. The perf_sample_data_init() used to initialize a couple of fields unconditionally so they were placed together at the head. But it's changed now to set the fields according to the actual sample_type flags. The main user (the perf tools) sets the IP, TID, TIME, PERIOD always. Also group relevant fields like addr, phys_addr and data_page_size. Suggested-by: Peter Zijlstra Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20221229204101.1099430-1-namhyung@kernel.org --- include/linux/perf_event.h | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ad92ad37600e..03949d017ac9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1098,47 +1098,51 @@ extern u64 perf_event_read_value(struct perf_event *event, struct perf_sample_data { /* - * Fields set by perf_sample_data_init(), group so as to - * minimize the cachelines touched. + * Fields set by perf_sample_data_init() unconditionally, + * group so as to minimize the cachelines touched. */ u64 sample_flags; u64 period; /* - * The other fields, optionally {set,used} by - * perf_{prepare,output}_sample(). + * Fields commonly set by __perf_event_header__init_id(), + * group so as to minimize the cachelines touched. */ - struct perf_branch_stack *br_stack; - union perf_sample_weight weight; - union perf_mem_data_src data_src; - u64 txn; - u64 addr; - struct perf_raw_record *raw; - u64 type; - u64 ip; struct { u32 pid; u32 tid; } tid_entry; u64 time; u64 id; - u64 stream_id; struct { u32 cpu; u32 reserved; } cpu_entry; + + /* + * The other fields, optionally {set,used} by + * perf_{prepare,output}_sample(). + */ + u64 ip; struct perf_callchain_entry *callchain; - u64 aux_size; + struct perf_raw_record *raw; + struct perf_branch_stack *br_stack; + union perf_sample_weight weight; + union perf_mem_data_src data_src; + u64 txn; struct perf_regs regs_user; struct perf_regs regs_intr; u64 stack_user_size; - u64 phys_addr; + u64 stream_id; u64 cgroup; + u64 addr; + u64 phys_addr; u64 data_page_size; u64 code_page_size; + u64 aux_size; } ____cacheline_aligned; /* default value for data source */ -- cgit v1.2.3 From 00a5d41ee1b05a8f0c75e1f7e26d363f4c68420e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Sep 2022 14:19:08 +0200 Subject: ARM: omap2: smartreflex: remove on_init control Nothing calls omap_enable_smartreflex_on_init() any more, so it does not need to be tracked either. Acked-by: Tony Lindgren Signed-off-by: Arnd Bergmann --- include/linux/power/smartreflex.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/smartreflex.h b/include/linux/power/smartreflex.h index 167b9b040091..3a2c79dfc1ff 100644 --- a/include/linux/power/smartreflex.h +++ b/include/linux/power/smartreflex.h @@ -273,8 +273,6 @@ struct omap_sr_nvalue_table { * @senn_avgweight SENNAVGWEIGHT value of the sr AVGWEIGHT register * @senp_avgweight SENPAVGWEIGHT value of the sr AVGWEIGHT register * @nvalue_count: Number of distinct nvalues in the nvalue table - * @enable_on_init: whether this sr module needs to enabled at - * boot up or not. * @nvalue_table: table containing the efuse offsets and nvalues * corresponding to them. * @voltdm: Pointer to the voltage domain associated with the SR @@ -290,7 +288,6 @@ struct omap_sr_data { u32 senn_avgweight; u32 senp_avgweight; int nvalue_count; - bool enable_on_init; struct omap_sr_nvalue_table *nvalue_table; struct voltagedomain *voltdm; }; -- cgit v1.2.3 From 6aeb51c1035c1c9dd666897892d5cb168933ce7b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Sep 2022 17:09:42 +0200 Subject: ARM: omap2: make functions static A number of functions are only called from the file they are defined in, so remove the extern declarations and make them local to those files. Acked-by: Tony Lindgren Signed-off-by: Arnd Bergmann --- include/linux/platform_data/voltage-omap.h | 1 - include/linux/usb/musb.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/voltage-omap.h b/include/linux/platform_data/voltage-omap.h index 43e8da9fb447..6d74e507dbd2 100644 --- a/include/linux/platform_data/voltage-omap.h +++ b/include/linux/platform_data/voltage-omap.h @@ -29,7 +29,6 @@ struct omap_volt_data { struct voltagedomain; struct voltagedomain *voltdm_lookup(const char *name); -int voltdm_scale(struct voltagedomain *voltdm, unsigned long target_volt); unsigned long voltdm_get_voltage(struct voltagedomain *voltdm); struct omap_volt_data *omap_voltage_get_voltdata(struct voltagedomain *voltdm, unsigned long volt); diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index fc6c77918481..e4a3ad3c800f 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -143,8 +143,6 @@ extern int __init tusb6010_setup_interface( unsigned async_cs, unsigned sync_cs, unsigned irq, unsigned dmachan); -extern int tusb6010_platform_retime(unsigned is_refclk); - #endif /* OMAP2 */ #endif /* __LINUX_USB_MUSB_H */ -- cgit v1.2.3 From be505ba8fe90068868bc084cad2079a38486b2d5 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 18 Nov 2022 10:58:06 +0800 Subject: ASoC/soundwire: remove is_sdca boolean property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Device_ID registers already tell us if a device supports the SDCA specification or not, in hindsight we never needed a property when the information is reported by both hardware and ACPI. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Reviewed-by: Charles Keepax Acked-by: Mark Brown Link: https://lore.kernel.org/r/20221118025807.534863-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 9e4537f409c2..8fb458931772 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -365,7 +365,6 @@ struct sdw_dpn_prop { * @sink_dpn_prop: Sink Data Port N properties * @scp_int1_mask: SCP_INT1_MASK desired settings * @quirks: bitmask identifying deltas from the MIPI specification - * @is_sdca: the Slave supports the SDCA specification */ struct sdw_slave_prop { u32 mipi_revision; @@ -389,7 +388,6 @@ struct sdw_slave_prop { struct sdw_dpn_prop *sink_dpn_prop; u8 scp_int1_mask; u32 quirks; - bool is_sdca; }; #define SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY BIT(0) -- cgit v1.2.3 From ffa1726589a7cc4bd96a7f19f43cecdb7342478f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 18 Nov 2022 10:58:07 +0800 Subject: soundwire: enable optional clock registers for SoundWire 1.2 devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bus supports the mandatory clock registers for SDCA devices, these registers can also be optionally supported by SoundWire 1.2 devices that don't follow the SDCA class specification. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Reviewed-by: Charles Keepax Link: https://lore.kernel.org/r/20221118025807.534863-3-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 8fb458931772..9a49263c53cf 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -365,6 +365,9 @@ struct sdw_dpn_prop { * @sink_dpn_prop: Sink Data Port N properties * @scp_int1_mask: SCP_INT1_MASK desired settings * @quirks: bitmask identifying deltas from the MIPI specification + * @clock_reg_supported: the Peripheral implements the clock base and scale + * registers introduced with the SoundWire 1.2 specification. SDCA devices + * do not need to set this boolean property as the registers are required. */ struct sdw_slave_prop { u32 mipi_revision; @@ -388,6 +391,7 @@ struct sdw_slave_prop { struct sdw_dpn_prop *sink_dpn_prop; u8 scp_int1_mask; u32 quirks; + bool clock_reg_supported; }; #define SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY BIT(0) -- cgit v1.2.3 From 62dc9f3f2fd07a2d4f4c97d76403f387363cb637 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Fri, 25 Nov 2022 14:20:25 +0000 Subject: soundwire: bus: export sdw_nwrite_no_pm and sdw_nread_no_pm functions The commit 167790abb90f ("soundwire: export sdw_write/read_no_pm functions") exposed the single byte no_pm versions of the IO functions that can be used without touching PM, export the multi byte no_pm versions for the same reason. Reviewed-by: Pierre-Louis Bossart Signed-off-by: Simon Trimmer Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20221125142028.1118618-2-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 9a49263c53cf..13019e3904b6 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -1049,7 +1049,9 @@ int sdw_write(struct sdw_slave *slave, u32 addr, u8 value); int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value); int sdw_read_no_pm(struct sdw_slave *slave, u32 addr); int sdw_nread(struct sdw_slave *slave, u32 addr, size_t count, u8 *val); +int sdw_nread_no_pm(struct sdw_slave *slave, u32 addr, size_t count, u8 *val); int sdw_nwrite(struct sdw_slave *slave, u32 addr, size_t count, const u8 *val); +int sdw_nwrite_no_pm(struct sdw_slave *slave, u32 addr, size_t count, const u8 *val); int sdw_update(struct sdw_slave *slave, u32 addr, u8 mask, u8 val); int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val); -- cgit v1.2.3 From 6726b47a2a60e3f2ef15ee7c4c6d9caa27770576 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 25 Nov 2022 14:20:26 +0000 Subject: soundwire: Provide build stubs for common functions Provide stub functions when CONFIG_SOUNDWIRE is not set for functions that are quite likely to be used from common code on devices supporting multiple control buses. Reviewed-by: Pierre-Louis Bossart Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20221125142028.1118618-3-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 105 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 95 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 13019e3904b6..3cd2a761911f 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -4,6 +4,7 @@ #ifndef __SOUNDWIRE_H #define __SOUNDWIRE_H +#include #include #include @@ -1023,15 +1024,8 @@ int sdw_stream_add_master(struct sdw_bus *bus, struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream); -int sdw_stream_add_slave(struct sdw_slave *slave, - struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, - unsigned int num_ports, - struct sdw_stream_runtime *stream); int sdw_stream_remove_master(struct sdw_bus *bus, struct sdw_stream_runtime *stream); -int sdw_stream_remove_slave(struct sdw_slave *slave, - struct sdw_stream_runtime *stream); int sdw_startup_stream(void *sdw_substream); int sdw_prepare_stream(struct sdw_stream_runtime *stream); int sdw_enable_stream(struct sdw_stream_runtime *stream); @@ -1042,8 +1036,20 @@ int sdw_bus_prep_clk_stop(struct sdw_bus *bus); int sdw_bus_clk_stop(struct sdw_bus *bus); int sdw_bus_exit_clk_stop(struct sdw_bus *bus); -/* messaging and data APIs */ +int sdw_compare_devid(struct sdw_slave *slave, struct sdw_slave_id id); +void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id); +#if IS_ENABLED(CONFIG_SOUNDWIRE) + +int sdw_stream_add_slave(struct sdw_slave *slave, + struct sdw_stream_config *stream_config, + struct sdw_port_config *port_config, + unsigned int num_ports, + struct sdw_stream_runtime *stream); +int sdw_stream_remove_slave(struct sdw_slave *slave, + struct sdw_stream_runtime *stream); + +/* messaging and data APIs */ int sdw_read(struct sdw_slave *slave, u32 addr); int sdw_write(struct sdw_slave *slave, u32 addr, u8 value); int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value); @@ -1055,7 +1061,86 @@ int sdw_nwrite_no_pm(struct sdw_slave *slave, u32 addr, size_t count, const u8 * int sdw_update(struct sdw_slave *slave, u32 addr, u8 mask, u8 val); int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val); -int sdw_compare_devid(struct sdw_slave *slave, struct sdw_slave_id id); -void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id); +#else + +static inline int sdw_stream_add_slave(struct sdw_slave *slave, + struct sdw_stream_config *stream_config, + struct sdw_port_config *port_config, + unsigned int num_ports, + struct sdw_stream_runtime *stream) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_stream_remove_slave(struct sdw_slave *slave, + struct sdw_stream_runtime *stream) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +/* messaging and data APIs */ +static inline int sdw_read(struct sdw_slave *slave, u32 addr) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_write(struct sdw_slave *slave, u32 addr, u8 value) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_read_no_pm(struct sdw_slave *slave, u32 addr) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_nread(struct sdw_slave *slave, u32 addr, size_t count, u8 *val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_nread_no_pm(struct sdw_slave *slave, u32 addr, size_t count, u8 *val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_nwrite(struct sdw_slave *slave, u32 addr, size_t count, const u8 *val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_nwrite_no_pm(struct sdw_slave *slave, u32 addr, size_t count, const u8 *val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_update(struct sdw_slave *slave, u32 addr, u8 mask, u8 val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +#endif /* CONFIG_SOUNDWIRE */ #endif /* __SOUNDWIRE_H */ -- cgit v1.2.3 From 0ac7200e3317bdde7b96112f24b9253208c0258b Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Wed, 28 Dec 2022 00:45:04 +0000 Subject: Revert "mfd: cros_ec: Add SCP Core-1 as a new CrOS EC MCU" This reverts commit 66ee379d743c69c726b61d078119a34d5be96a35. The feature flag introduced by Commit 66ee379d743c ("mfd: cros_ec: Add SCP Core-1 as a new CrOS EC MCU") was not first added in the source EC code base[1]. This can lead to the possible misinterpration of an EC's supported feature set, as well as causes issues with all future feature flag updates. [1] https://source.chromium.org/chromium/chromiumos/platform/ec/+/main:include/ec_commands.h Signed-off-by: Prashant Malani Acked-by: Lee Jones Reviewed-by: Benson Leung Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221228004648.793339-2-pmalani@chromium.org --- include/linux/platform_data/cros_ec_commands.h | 2 -- include/linux/platform_data/cros_ec_proto.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 5744a2d746aa..7c94bf5c8f05 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1300,8 +1300,6 @@ enum ec_feature_code { * mux. */ EC_FEATURE_TYPEC_MUX_REQUIRE_AP_ACK = 43, - /* The MCU is a System Companion Processor (SCP) 2nd Core. */ - EC_FEATURE_SCP_C1 = 45, }; #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32) diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 7fb2196f99b0..017d502ed66e 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -19,7 +19,6 @@ #define CROS_EC_DEV_ISH_NAME "cros_ish" #define CROS_EC_DEV_PD_NAME "cros_pd" #define CROS_EC_DEV_SCP_NAME "cros_scp" -#define CROS_EC_DEV_SCP_C1_NAME "cros_scp_c1" #define CROS_EC_DEV_TP_NAME "cros_tp" #define CROS_EC_DEV_EC_INDEX 0 -- cgit v1.2.3 From 0e0dba884c4318c433756118794a7dff8947e6ce Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Wed, 28 Dec 2022 00:45:05 +0000 Subject: platform_chrome: cros_ec: Add Type-C VDM defines Add the EC header changes need to support USB Type-C VDM (Vendor Defined Messages) communication between the system and USB PD-enabled peripherals. The headers are already present in the EC code base, from which they've been ported [1]. [1] https://source.chromium.org/chromium/chromiumos/platform/ec/+/main:include/ec_commands.h Signed-off-by: Prashant Malani Reviewed-by: Benson Leung Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221228004648.793339-3-pmalani@chromium.org --- include/linux/platform_data/cros_ec_commands.h | 51 ++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 7c94bf5c8f05..6665e7da6ee2 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1300,6 +1300,18 @@ enum ec_feature_code { * mux. */ EC_FEATURE_TYPEC_MUX_REQUIRE_AP_ACK = 43, + /* + * The EC supports entering and residing in S4. + */ + EC_FEATURE_S4_RESIDENCY = 44, + /* + * The EC supports the AP directing mux sets for the board. + */ + EC_FEATURE_TYPEC_AP_MUX_SET = 45, + /* + * The EC supports the AP composing VDMs for us to send. + */ + EC_FEATURE_TYPEC_AP_VDM_SEND = 46, }; #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32) @@ -5724,6 +5736,8 @@ enum typec_control_command { TYPEC_CONTROL_COMMAND_ENTER_MODE, TYPEC_CONTROL_COMMAND_TBT_UFP_REPLY, TYPEC_CONTROL_COMMAND_USB_MUX_SET, + TYPEC_CONTROL_COMMAND_BIST_SHARE_MODE, + TYPEC_CONTROL_COMMAND_SEND_VDM_REQ, }; /* Replies the AP may specify to the TBT EnterMode command as a UFP */ @@ -5737,6 +5751,17 @@ struct typec_usb_mux_set { uint8_t mux_flags; /* USB_PD_MUX_*-encoded USB mux state to set */ } __ec_align1; +#define VDO_MAX_SIZE 7 + +struct typec_vdm_req { + /* VDM data, including VDM header */ + uint32_t vdm_data[VDO_MAX_SIZE]; + /* Number of 32-bit fields filled in */ + uint8_t vdm_data_objects; + /* Partner to address - see enum typec_partner_type */ + uint8_t partner_type; +} __ec_align1; + struct ec_params_typec_control { uint8_t port; uint8_t command; /* enum typec_control_command */ @@ -5752,6 +5777,8 @@ struct ec_params_typec_control { uint8_t mode_to_enter; /* enum typec_mode */ uint8_t tbt_ufp_reply; /* enum typec_tbt_ufp_reply */ struct typec_usb_mux_set mux_params; + /* Used for VMD_REQ */ + struct typec_vdm_req vdm_req_params; uint8_t placeholder[128]; }; } __ec_align1; @@ -5833,6 +5860,8 @@ enum tcpc_cc_polarity { #define PD_STATUS_EVENT_DISCONNECTED BIT(3) #define PD_STATUS_EVENT_MUX_0_SET_DONE BIT(4) #define PD_STATUS_EVENT_MUX_1_SET_DONE BIT(5) +#define PD_STATUS_EVENT_VDM_REQ_REPLY BIT(6) +#define PD_STATUS_EVENT_VDM_REQ_FAILED BIT(7) struct ec_params_typec_status { uint8_t port; @@ -5876,6 +5905,28 @@ struct ec_response_typec_status { uint32_t sink_cap_pdos[7]; /* Max 7 PDOs can be present */ } __ec_align1; +/* + * Gather the response to the most recent VDM REQ from the AP + */ +#define EC_CMD_TYPEC_VDM_RESPONSE 0x013C + +struct ec_params_typec_vdm_response { + uint8_t port; +} __ec_align1; + +struct ec_response_typec_vdm_response { + /* Number of 32-bit fields filled in */ + uint8_t vdm_data_objects; + /* Partner to address - see enum typec_partner_type */ + uint8_t partner_type; + /* Reserved */ + uint16_t reserved; + /* VDM data, including VDM header */ + uint32_t vdm_response[VDO_MAX_SIZE]; +} __ec_align1; + +#undef VDO_MAX_SIZE + /*****************************************************************************/ /* The command range 0x200-0x2FF is reserved for Rotor. */ -- cgit v1.2.3 From 101ca8d05913b7d1e6e8b9dd792193d4082fff86 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 2 Jan 2023 17:06:30 -0600 Subject: rtc: efi: Enable SET/GET WAKEUP services as optional The current implementation of rtc-efi is expecting all the 4 time services GET{SET}_TIME{WAKEUP} must be supported by UEFI firmware. As per the EFI_RT_PROPERTIES_TABLE, the platform specific implementations can choose to enable selective time services based on the RTC device capabilities. This patch does the following changes to provide GET/SET RTC services on platforms that do not support the WAKEUP feature. 1) Relax time services cap check when creating a platform device. 2) Clear RTC_FEATURE_ALARM bit in the absence of WAKEUP services. 3) Conditional alarm entries in '/proc/driver/rtc'. Cc: # v6.0+ Signed-off-by: Shanker Donthineni Link: https://lore.kernel.org/r/20230102230630.192911-1-sdonthineni@nvidia.com Signed-off-by: Alexandre Belloni --- include/linux/efi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4b27519143f5..98598bd1d2fa 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -668,7 +668,8 @@ extern struct efi { #define EFI_RT_SUPPORTED_ALL 0x3fff -#define EFI_RT_SUPPORTED_TIME_SERVICES 0x000f +#define EFI_RT_SUPPORTED_TIME_SERVICES 0x0003 +#define EFI_RT_SUPPORTED_WAKEUP_SERVICES 0x000c #define EFI_RT_SUPPORTED_VARIABLE_SERVICES 0x0070 extern struct mm_struct efi_mm; -- cgit v1.2.3 From 5306892a50bf4cd4cc945bad286c7c950078d65e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 23 Dec 2022 12:36:33 -0800 Subject: fsverity: support verification with tree block size < PAGE_SIZE Add support for verifying data from verity files whose Merkle tree block size is less than the page size. The main use case for this is to allow a single Merkle tree block size to be used across all systems, so that only one set of fsverity file digests and signatures is needed. To do this, eliminate various assumptions that the Merkle tree block size and the page size are the same: - Make fsverity_verify_page() a wrapper around a new function fsverity_verify_blocks() which verifies one or more blocks in a page. - When a Merkle tree block is needed, get the corresponding page and only verify and use the needed portion. (The Merkle tree continues to be read and cached in page-sized chunks; that doesn't need to change.) - When the Merkle tree block size and page size differ, use a bitmap fsverity_info::hash_block_verified to keep track of which Merkle tree blocks have been verified, as PageChecked cannot be used directly. Signed-off-by: Eric Biggers Reviewed-by: Andrey Albershteyn Tested-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20221223203638.41293-7-ebiggers@kernel.org --- include/linux/fsverity.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index f5ed7ecfd9ab..6ecc51f80221 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -170,7 +170,8 @@ int fsverity_ioctl_read_metadata(struct file *filp, const void __user *uarg); /* verify.c */ -bool fsverity_verify_page(struct page *page); +bool fsverity_verify_blocks(struct page *page, unsigned int len, + unsigned int offset); void fsverity_verify_bio(struct bio *bio); void fsverity_enqueue_verify_work(struct work_struct *work); @@ -230,7 +231,8 @@ static inline int fsverity_ioctl_read_metadata(struct file *filp, /* verify.c */ -static inline bool fsverity_verify_page(struct page *page) +static inline bool fsverity_verify_blocks(struct page *page, unsigned int len, + unsigned int offset) { WARN_ON(1); return false; @@ -248,6 +250,11 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work) #endif /* !CONFIG_FS_VERITY */ +static inline bool fsverity_verify_page(struct page *page) +{ + return fsverity_verify_blocks(page, PAGE_SIZE, 0); +} + /** * fsverity_active() - do reads from the inode need to go through fs-verity? * @inode: inode to check -- cgit v1.2.3 From 56124d6c87fd749477425110d2564166621a89c4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 23 Dec 2022 12:36:34 -0800 Subject: fsverity: support enabling with tree block size < PAGE_SIZE Make FS_IOC_ENABLE_VERITY support values of fsverity_enable_arg::block_size other than PAGE_SIZE. To make this possible, rework build_merkle_tree(), which was reading data and hash pages from the file and assuming that they were the same thing as "blocks". For reading the data blocks, just replace the direct pagecache access with __kernel_read(), to naturally read one block at a time. (A disadvantage of the above is that we lose the two optimizations of hashing the pagecache pages in-place and forcing the maximum readahead. That shouldn't be very important, though.) The hash block reads are a bit more difficult to handle, as the only way to do them is through fsverity_operations::read_merkle_tree_page(). Instead, let's switch to the single-pass tree construction algorithm that fsverity-utils uses. This eliminates the need to read back any hash blocks while the tree is being built, at the small cost of an extra block-sized memory buffer per Merkle tree level. This is probably what I should have done originally. Taken together, the above two changes result in page-size independent code that is also a bit simpler than what we had before. Signed-off-by: Eric Biggers Reviewed-by: Andrey Albershteyn Tested-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20221223203638.41293-8-ebiggers@kernel.org --- include/linux/fsverity.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 6ecc51f80221..991a44458996 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -93,8 +93,7 @@ struct fsverity_operations { * isn't already cached. Implementations may ignore this * argument; it's only a performance optimization. * - * This can be called at any time on an open verity file, as well as - * between ->begin_enable_verity() and ->end_enable_verity(). It may be + * This can be called at any time on an open verity file. It may be * called by multiple processes concurrently, even with the same page. * * Note that this must retrieve a *page*, not necessarily a *block*. -- cgit v1.2.3 From e7895f017b79410bf4591396a733b876dc1e0e9d Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 6 Jan 2023 10:44:00 -0500 Subject: bpf: remove the do_idr_lock parameter from bpf_prog_free_id() It was determined that the do_idr_lock parameter to bpf_prog_free_id() was not necessary as it should always be true. Suggested-by: Stanislav Fomichev Signed-off-by: Paul Moore Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230106154400.74211-2-paul@paul-moore.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3de24cfb7a3d..634d37a599fa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1832,7 +1832,7 @@ void bpf_prog_inc(struct bpf_prog *prog); struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); +void bpf_prog_free_id(struct bpf_prog *prog); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); struct btf_field *btf_record_find(const struct btf_record *rec, -- cgit v1.2.3 From da2e552b469a0cd130ff70a88ccc4139da428a65 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 28 Nov 2022 19:05:47 +0200 Subject: net/mlx5: Fix command stats access after free Command may fail while driver is reloading and can't accept FW commands till command interface is reinitialized. Such command failure is being logged to command stats. This results in NULL pointer access as command stats structure is being freed and reallocated during mlx5 devlink reload (see kernel log below). Fix it by making command stats statically allocated on driver probe. Kernel log: [ 2394.808802] BUG: unable to handle kernel paging request at 000000000002a9c0 [ 2394.810610] PGD 0 P4D 0 [ 2394.811811] Oops: 0002 [#1] SMP NOPTI ... [ 2394.815482] RIP: 0010:native_queued_spin_lock_slowpath+0x183/0x1d0 ... [ 2394.829505] Call Trace: [ 2394.830667] _raw_spin_lock_irq+0x23/0x26 [ 2394.831858] cmd_status_err+0x55/0x110 [mlx5_core] [ 2394.833020] mlx5_access_reg+0xe7/0x150 [mlx5_core] [ 2394.834175] mlx5_query_port_ptys+0x78/0xa0 [mlx5_core] [ 2394.835337] mlx5e_ethtool_get_link_ksettings+0x74/0x590 [mlx5_core] [ 2394.836454] ? kmem_cache_alloc_trace+0x140/0x1c0 [ 2394.837562] __rh_call_get_link_ksettings+0x33/0x100 [ 2394.838663] ? __rtnl_unlock+0x25/0x50 [ 2394.839755] __ethtool_get_link_ksettings+0x72/0x150 [ 2394.840862] duplex_show+0x6e/0xc0 [ 2394.841963] dev_attr_show+0x1c/0x40 [ 2394.843048] sysfs_kf_seq_show+0x9b/0x100 [ 2394.844123] seq_read+0x153/0x410 [ 2394.845187] vfs_read+0x91/0x140 [ 2394.846226] ksys_read+0x4f/0xb0 [ 2394.847234] do_syscall_64+0x5b/0x1a0 [ 2394.848228] entry_SYSCALL_64_after_hwframe+0x65/0xca Fixes: 34f46ae0d4b3 ("net/mlx5: Add command failures data to debugfs") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d476255c9a3f..76ef2e4fde38 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -315,7 +315,7 @@ struct mlx5_cmd { struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; - struct mlx5_cmd_stats *stats; + struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; }; struct mlx5_cmd_mailbox { -- cgit v1.2.3 From 7d885863e716757553197687f304da1f538f61e1 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 9 Jan 2023 13:30:12 +0100 Subject: net: phy: allow a phy to opt-out of interrupt handling Until now, it is not possible for a PHY driver to disable interrupts during runtime. If a driver offers the .config_intr() as well as the .handle_interrupt() ops, it is eligible for interrupt handling. Introduce a new flag for the dev_flags property of struct phy_device, which can be set by PHY driver to skip interrupt setup and fall back to polling mode. At the moment, this is used for the MaxLinear PHY which has broken interrupt handling and there is a need to disable interrupts in some cases. Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Signed-off-by: Paolo Abeni --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6378c997ded5..742754d72fc0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -739,6 +739,9 @@ struct phy_device { #endif }; +/* Generic phy_device::dev_flags */ +#define PHY_F_NO_IRQ 0x80000000 + static inline struct phy_device *to_phy_device(const struct device *dev) { return container_of(to_mdio_device(dev), struct phy_device, mdio); -- cgit v1.2.3 From 8afbb4273977f055db219a22daaafe64be10fb95 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:05 +0100 Subject: driver core: make bus_get_device_klist() static No one calls this function outside of drivers/base/bus.c so make it static so it does not need to be exported anymore. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index d8b29ccd07e5..01077c503d61 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -287,6 +287,5 @@ extern int bus_unregister_notifier(struct bus_type *bus, #define BUS_NOTIFY_DRIVER_NOT_BOUND 0x00000008 /* driver fails to be bound */ extern struct kset *bus_get_kset(struct bus_type *bus); -extern struct klist *bus_get_device_klist(struct bus_type *bus); #endif -- cgit v1.2.3 From a9efdd2519edd7df84afd075b65ca6428dcb0039 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:06 +0100 Subject: driver core: remove subsys_find_device_by_id() This function has not been called by any code in the kernel tree in many many years so remove it as it is unused. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 01077c503d61..3b1a2066afcd 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -250,8 +250,6 @@ bus_find_device_by_acpi_dev(struct bus_type *bus, const void *adev) } #endif -struct device *subsys_find_device_by_id(struct bus_type *bus, unsigned int id, - struct device *hint); int bus_for_each_drv(struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)); void bus_sort_breadthfirst(struct bus_type *bus, -- cgit v1.2.3 From 2e45fc5502af9826617a96fe63aee055002cac97 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:07 +0100 Subject: driver core: make subsys_dev_iter_init() static No one outside of drivers/base/bus.c calls this function so make it static and remove the exported symbol. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-3-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 3b1a2066afcd..67cb5b29f62d 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -154,10 +154,6 @@ struct subsys_dev_iter { struct klist_iter ki; const struct device_type *type; }; -void subsys_dev_iter_init(struct subsys_dev_iter *iter, - struct bus_type *subsys, - struct device *start, - const struct device_type *type); struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter); void subsys_dev_iter_exit(struct subsys_dev_iter *iter); -- cgit v1.2.3 From 38cdadefa2feecc9e7aa5f67bc4aea5b9a8ca59f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:08 +0100 Subject: driver core: make subsys_dev_iter_next() static The function subsys_dev_iter_next() is only used in drivers/base/bus.c so make it static to that file and remove the global export. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-4-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 67cb5b29f62d..4f994d8fce0c 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -154,7 +154,6 @@ struct subsys_dev_iter { struct klist_iter ki; const struct device_type *type; }; -struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter); void subsys_dev_iter_exit(struct subsys_dev_iter *iter); int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, -- cgit v1.2.3 From af6d0743599e594cb2cec3f1a6d2600a57d1d375 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:09 +0100 Subject: driver core: make subsys_dev_iter_exit() static The function subsys_dev_iter_exit() is not used outside of drivers/base/bus.c so make it static to that file and remove the global export. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-5-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 4f994d8fce0c..848d49f4cc09 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -154,7 +154,6 @@ struct subsys_dev_iter { struct klist_iter ki; const struct device_type *type; }; -void subsys_dev_iter_exit(struct subsys_dev_iter *iter); int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); -- cgit v1.2.3 From b0a8a59a1c44c07807afe50c6bd21a33c9ec98b7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:10 +0100 Subject: driver core: move struct subsys_dev_iter to a local file struct subsys_dev_iter is not used by any code outside of drivers/base/bus.c so move it into that file and out of the global bus.h file. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-6-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 848d49f4cc09..d529f644e92b 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -150,11 +150,6 @@ int device_match_acpi_handle(struct device *dev, const void *handle); int device_match_any(struct device *dev, const void *unused); /* iterator helpers for buses */ -struct subsys_dev_iter { - struct klist_iter ki; - const struct device_type *type; -}; - int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); struct device *bus_find_device(struct bus_type *bus, struct device *start, -- cgit v1.2.3 From 26b36df7516692292312063ca6fd19e73c06d4e7 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Tue, 27 Dec 2022 22:45:27 +0200 Subject: clk: Add generic sync_state callback for disabling unused clocks There are unused clocks that need to remain untouched by clk_disable_unused, and most likely could be disabled later on sync_state. So provide a generic sync_state callback for the clock providers that register such clocks. Then, use the same mechanism as clk_disable_unused from that generic callback, but pass the device to make sure only the clocks belonging to the current clock provider get disabled, if unused. Also, during the default clk_disable_unused, if the driver that registered the clock has the generic clk_sync_state_disable_unused callback set for sync_state, skip disabling its clocks. Signed-off-by: Abel Vesa Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221227204528.1899863-1-abel.vesa@linaro.org --- include/linux/clk-provider.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 842e72a5348f..cf1adfeaf257 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -720,6 +720,7 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); +void clk_sync_state_disable_unused(struct device *dev); /** * clk_register_divider - register a divider clock with the clock framework * @dev: device registering this clock -- cgit v1.2.3 From a9236a0aa7d7f52a974cc7eaa971fae92aa477c5 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 2 Jan 2023 16:18:27 +0530 Subject: PM: domains: Allow a genpd consumer to require a synced power off Some genpd providers doesn't ensure that it has turned off at hardware. This is fine until the consumer really requires during some special scenarios that the power domain collapse at hardware before it is turned ON again. An example is the reset sequence of Adreno GPU which requires that the 'gpucc cx gdsc' power domain should move to OFF state in hardware at least once before turning in ON again to clear the internal state. Signed-off-by: Ulf Hansson Signed-off-by: Akhil P Oommen Reviewed-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230102161757.v5.1.I3e6b1f078ad0f1ca9358c573daa7b70ec132cdbe@changeid --- include/linux/pm_domain.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1cd41bdf73cf..f776fb93eaa0 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -136,6 +136,7 @@ struct generic_pm_domain { unsigned int prepared_count; /* Suspend counter of prepared devices */ unsigned int performance_state; /* Aggregated max performance state */ cpumask_var_t cpus; /* A cpumask of the attached CPUs */ + bool synced_poweroff; /* A consumer needs a synced poweroff */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); struct raw_notifier_head power_notifiers; /* Power on/off notifiers */ @@ -235,6 +236,7 @@ int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb); int dev_pm_genpd_remove_notifier(struct device *dev); void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next); ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev); +void dev_pm_genpd_synced_poweroff(struct device *dev); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -300,6 +302,9 @@ static inline ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev) { return KTIME_MAX; } +static inline void dev_pm_genpd_synced_poweroff(struct device *dev) +{ } + #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif -- cgit v1.2.3 From 980a637d11fe8dfc734f508a422185c2de55e669 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 4 Jan 2023 09:35:09 +0100 Subject: ARM: omap1: fix !ARCH_OMAP1_ANY link failures While compile-testing randconfig builds for the upcoming boardfile removal, I noticed that an earlier patch of mine was completely broken, and the introduction of CONFIG_ARCH_OMAP1_ANY only replaced one set of build failures with another one, now resulting in link failures like ld: drivers/video/fbdev/omap/omapfb_main.o: in function `omapfb_do_probe': drivers/video/fbdev/omap/omapfb_main.c:1703: undefined reference to `omap_set_dma_priority' ld: drivers/dma/ti/omap-dma.o: in function `omap_dma_free_chan_resources': drivers/dma/ti/omap-dma.c:777: undefined reference to `omap_free_dma' drivers/dma/ti/omap-dma.c:1685: undefined reference to `omap_get_plat_info' ld: drivers/usb/gadget/udc/omap_udc.o: in function `next_in_dma': drivers/usb/gadget/udc/omap_udc.c:820: undefined reference to `omap_get_dma_active_status' I tried reworking it, but the resulting patch ended up much bigger than simply avoiding the original problem of unused-function warnings like arch/arm/mach-omap1/mcbsp.c:76:30: error: unused variable 'omap1_mcbsp_ops' [-Werror,-Wunused-variable] As a result, revert the previous fix, and rearrange the code that produces warnings to hide them. For mcbsp, the #ifdef check can simply be removed as the cpu_is_omapxxx() checks already achieve the same result, while in the io.c the easiest solution appears to be to merge the common map bits into each soc specific portion. This gets cleaned in a nicer way after omap7xx support gets dropped, as the remaining SoCs all have the exact same I/O map. Fixes: 615dce5bf736 ("ARM: omap1: fix build with no SoC selected") Cc: stable@vger.kernel.org Acked-by: Aaro Koskinen Signed-off-by: Arnd Bergmann --- include/linux/soc/ti/omap1-io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/omap1-io.h b/include/linux/soc/ti/omap1-io.h index f7f12728d4a6..9a60f45899d3 100644 --- a/include/linux/soc/ti/omap1-io.h +++ b/include/linux/soc/ti/omap1-io.h @@ -5,7 +5,7 @@ #ifndef __ASSEMBLER__ #include -#ifdef CONFIG_ARCH_OMAP1_ANY +#ifdef CONFIG_ARCH_OMAP1 /* * NOTE: Please use ioremap + __raw_read/write where possible instead of these */ @@ -15,7 +15,7 @@ extern u32 omap_readl(u32 pa); extern void omap_writeb(u8 v, u32 pa); extern void omap_writew(u16 v, u32 pa); extern void omap_writel(u32 v, u32 pa); -#else +#elif defined(CONFIG_COMPILE_TEST) static inline u8 omap_readb(u32 pa) { return 0; } static inline u16 omap_readw(u32 pa) { return 0; } static inline u32 omap_readl(u32 pa) { return 0; } -- cgit v1.2.3 From 129d868ede1ea835ac97f0a9f2cf3f4ed00d8ea7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 5 Jan 2023 11:26:47 -0800 Subject: bpf: Replace 0-length arrays with flexible arrays Zero-length arrays are deprecated [1]. Replace struct bpf_array's union of 0-length arrays with flexible arrays. Detected with GCC 13, by using -fstrict-flex-arrays=3: arch/x86/net/bpf_jit_comp.c: In function 'bpf_tail_call_direct_fixup': arch/x86/net/bpf_jit_comp.c:606:37: warning: array subscript is outside array bounds of 'void *[0]' [-Warray-bounds=] 606 | target = array->ptrs[poke->tail_call.key]; | ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~ In file included from include/linux/filter.h:9, from arch/x86/net/bpf_jit_comp.c:9: include/linux/bpf.h:1527:23: note: while referencing 'ptrs' 1527 | void *ptrs[0] __aligned(8); | ^~~~ [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Kees Cook Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20230105192646.never.154-kees@kernel.org --- include/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1697bd87fc06..ae7771c7d750 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1523,9 +1523,9 @@ struct bpf_array { u32 index_mask; struct bpf_array_aux *aux; union { - char value[0] __aligned(8); - void *ptrs[0] __aligned(8); - void __percpu *pptrs[0] __aligned(8); + DECLARE_FLEX_ARRAY(char, value) __aligned(8); + DECLARE_FLEX_ARRAY(void *, ptrs) __aligned(8); + DECLARE_FLEX_ARRAY(void __percpu *, pptrs) __aligned(8); }; }; -- cgit v1.2.3 From 4e4aafcddbbfcdd6eed5780e190fcbfac8b4685a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 9 Jan 2023 16:30:41 +0100 Subject: net: mdio: Add dedicated C45 API to MDIO bus drivers Currently C22 and C45 transactions are mixed over a combined API calls which make use of a special bit in the reg address to indicate if a C45 transaction should be performed. This makes it impossible to know if the bus driver actually supports C45. Additionally, many C22 only drivers don't return -EOPNOTSUPP when asked to perform a C45 transaction, they mistaking perform a C22 transaction. This is the first step to cleanly separate C22 from C45. To maintain backwards compatibility until all drivers which are capable of performing C45 are converted to this new API, the helper functions will fall back to the older API if the new API is not supported. Eventually this fallback will be removed. Signed-off-by: Andrew Lunn Signed-off-by: Michael Walle Signed-off-by: Jakub Kicinski --- include/linux/mdio.h | 39 ++++++++++++++++++++------------------- include/linux/phy.h | 5 +++++ 2 files changed, 25 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index f7fbbf3069e7..1e78c8410b21 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -423,6 +423,17 @@ int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set); int mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set); +int __mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum); +int mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum); +int __mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, + u16 val); +int mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, + u16 val); +int mdiobus_c45_modify(struct mii_bus *bus, int addr, int devad, u32 regnum, + u16 mask, u16 set); + +int mdiobus_c45_modify_changed(struct mii_bus *bus, int addr, int devad, + u32 regnum, u16 mask, u16 set); static inline int mdiodev_read(struct mdio_device *mdiodev, u32 regnum) { @@ -463,29 +474,19 @@ static inline u16 mdiobus_c45_devad(u32 regnum) return FIELD_GET(MII_DEVADDR_C45_MASK, regnum); } -static inline int __mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad, - u16 regnum) +static inline int mdiodev_c45_modify(struct mdio_device *mdiodev, int devad, + u32 regnum, u16 mask, u16 set) { - return __mdiobus_read(bus, prtad, mdiobus_c45_addr(devad, regnum)); + return mdiobus_c45_modify(mdiodev->bus, mdiodev->addr, devad, regnum, + mask, set); } -static inline int __mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad, - u16 regnum, u16 val) +static inline int mdiodev_c45_modify_changed(struct mdio_device *mdiodev, + int devad, u32 regnum, u16 mask, + u16 set) { - return __mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), - val); -} - -static inline int mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad, - u16 regnum) -{ - return mdiobus_read(bus, prtad, mdiobus_c45_addr(devad, regnum)); -} - -static inline int mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad, - u16 regnum, u16 val) -{ - return mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), val); + return mdiobus_c45_modify_changed(mdiodev->bus, mdiodev->addr, devad, + regnum, mask, set); } static inline int mdiodev_c45_read(struct mdio_device *mdiodev, int devad, diff --git a/include/linux/phy.h b/include/linux/phy.h index 742754d72fc0..89b43cda5bda 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -364,6 +364,11 @@ struct mii_bus { int (*read)(struct mii_bus *bus, int addr, int regnum); /** @write: Perform a write transfer on the bus */ int (*write)(struct mii_bus *bus, int addr, int regnum, u16 val); + /** @read_c45: Perform a C45 read transfer on the bus */ + int (*read_c45)(struct mii_bus *bus, int addr, int devnum, int regnum); + /** @write_c45: Perform a C45 write transfer on the bus */ + int (*write_c45)(struct mii_bus *bus, int addr, int devnum, + int regnum, u16 val); /** @reset: Perform a reset of the bus */ int (*reset)(struct mii_bus *bus); -- cgit v1.2.3 From ce30fa56cbf09ab6b28170ced689c3cfd329e979 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 9 Jan 2023 16:30:45 +0100 Subject: net: mdio: Move mdiobus_c45_addr() next to users Now that mdiobus_c45_addr() is only used within the MDIO code during fallback, move the function next to its only users. This function should not be used any more in drivers, the c45 helpers should be used in its place, so hiding it away will prevent any new users from being added. Signed-off-by: Andrew Lunn Signed-off-by: Michael Walle Signed-off-by: Jakub Kicinski --- include/linux/mdio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 1e78c8410b21..97b49765e8b5 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -459,11 +459,6 @@ static inline int mdiodev_modify_changed(struct mdio_device *mdiodev, mask, set); } -static inline u32 mdiobus_c45_addr(int devad, u16 regnum) -{ - return MII_ADDR_C45 | devad << MII_DEVADDR_C45_SHIFT | regnum; -} - static inline u16 mdiobus_c45_regad(u32 regnum) { return FIELD_GET(MII_REGADDR_C45_MASK, regnum); -- cgit v1.2.3 From 002dd3de097c778a74ae9e47e598bea6ad055af0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 9 Jan 2023 16:30:46 +0100 Subject: net: mdio: mdio-bitbang: Separate C22 and C45 transactions The bitbbanging bus driver can perform both C22 and C45 transfers. Create separate functions for each and register the C45 versions using the new driver API calls. The SH Ethernet driver places wrappers around these functions. In order to not break boards which might be using C45, add similar wrappers for C45 operations. Reviewed-by: Geert Uytterhoeven Signed-off-by: Andrew Lunn Signed-off-by: Michael Walle Signed-off-by: Jakub Kicinski --- include/linux/mdio-bitbang.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h index 373630fe5c28..cffabdbce075 100644 --- a/include/linux/mdio-bitbang.h +++ b/include/linux/mdio-bitbang.h @@ -38,8 +38,10 @@ struct mdiobb_ctrl { u8 op_c22_write; }; -int mdiobb_read(struct mii_bus *bus, int phy, int reg); -int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val); +int mdiobb_read_c22(struct mii_bus *bus, int phy, int reg); +int mdiobb_write_c22(struct mii_bus *bus, int phy, int reg, u16 val); +int mdiobb_read_c45(struct mii_bus *bus, int devad, int phy, int reg); +int mdiobb_write_c45(struct mii_bus *bus, int devad, int phy, int reg, u16 val); /* The returned bus is not yet registered with the phy layer. */ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl); -- cgit v1.2.3 From 1d914d51f03cbbbda3c34c675e49392ce60c1c33 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 9 Jan 2023 16:30:50 +0100 Subject: net: mdio: add mdiobus_c45_read/write_nested helpers Some DSA devices pass through PHY access to the MDIO bus the switch is on. Add C45 versions of the current C22 helpers for nested accesses to MDIO busses, so that C22 and C45 can be separated in these DSA drivers. Signed-off-by: Andrew Lunn Signed-off-by: Michael Walle Signed-off-by: Jakub Kicinski --- include/linux/mdio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 97b49765e8b5..220f3ca8702d 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -425,10 +425,14 @@ int mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set); int __mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum); int mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum); +int mdiobus_c45_read_nested(struct mii_bus *bus, int addr, int devad, + u32 regnum); int __mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, u16 val); int mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, u16 val); +int mdiobus_c45_write_nested(struct mii_bus *bus, int addr, int devad, + u32 regnum, u16 val); int mdiobus_c45_modify(struct mii_bus *bus, int addr, int devad, u32 regnum, u16 mask, u16 set); -- cgit v1.2.3 From e656cd0bcf3d2ba2eceac82b44714bf355428ec4 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 9 Jan 2023 14:05:22 +0100 Subject: soc: qcom: rmtfs: Optionally map RMTFS to more VMs Some SoCs require that RMTFS is also mapped to the NAV VM. Trying to power on the modem without that results in the whole platform crashing and forces a hard reboot within about 2 seconds. Add support for mapping the region to additional VMs, such as NAV to open a path towards enabling modem on such platforms. Signed-off-by: Loic Poulain [Konrad: reword, make conditional and flexible, add a define for NAV VMID] Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230109130523.298971-2-konrad.dybcio@linaro.org --- include/linux/qcom_scm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index f8335644a01a..150b72edb879 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -55,6 +55,7 @@ enum qcom_scm_ice_cipher { #define QCOM_SCM_VMID_MSS_MSA 0xF #define QCOM_SCM_VMID_WLAN 0x18 #define QCOM_SCM_VMID_WLAN_CE 0x19 +#define QCOM_SCM_VMID_NAV 0x2B #define QCOM_SCM_PERM_READ 0x4 #define QCOM_SCM_PERM_WRITE 0x2 #define QCOM_SCM_PERM_EXEC 0x1 -- cgit v1.2.3 From 45ca30eb9dfe622b00ce352cf28ee141d243254b Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 9 Jan 2023 14:05:23 +0100 Subject: dt-bindings: firmware: qcom: scm: Separate VMIDs from header to bindings The SCM VMIDs represent predefined mappings that come from the irreplaceable and non-omittable firmware that comes with every Qualcomm SoC (unless you steal engineering samples from the factory) and help clarify otherwise totally magic numbers which we are required to pass to the secure world for some parts of the SoC to work at all (with modem being the prime example). On top of that, with changes to the rmtfs binding, secure VMIDs will become useful to have in device trees for readability. Separate them out and add to include/dt-bindings. Signed-off-by: Konrad Dybcio Reviewed-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230109130523.298971-3-konrad.dybcio@linaro.org --- include/linux/qcom_scm.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 150b72edb879..1e449a5d7f5c 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -9,6 +9,8 @@ #include #include +#include + #define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) #define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 #define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 @@ -51,11 +53,6 @@ enum qcom_scm_ice_cipher { QCOM_SCM_ICE_CIPHER_AES_256_CBC = 4, }; -#define QCOM_SCM_VMID_HLOS 0x3 -#define QCOM_SCM_VMID_MSS_MSA 0xF -#define QCOM_SCM_VMID_WLAN 0x18 -#define QCOM_SCM_VMID_WLAN_CE 0x19 -#define QCOM_SCM_VMID_NAV 0x2B #define QCOM_SCM_PERM_READ 0x4 #define QCOM_SCM_PERM_WRITE 0x2 #define QCOM_SCM_PERM_EXEC 0x1 -- cgit v1.2.3 From 8d231dbc3b10155727bcfa9e543d397ad357f14f Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 28 Nov 2022 18:00:17 +0200 Subject: net/mlx5: Expose shared buffer registers bits and structs Add the shared receive buffer management and configuration registers: 1. SBPR - Shared Buffer Pools Register 2. SBCM - Shared Buffer Class Management Register Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 ++ include/linux/mlx5/mlx5_ifc.h | 61 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d476255c9a3f..0c4f6acf59ca 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -100,6 +100,8 @@ enum { }; enum { + MLX5_REG_SBPR = 0xb001, + MLX5_REG_SBCM = 0xb002, MLX5_REG_QPTS = 0x4002, MLX5_REG_QETCR = 0x4005, MLX5_REG_QTCT = 0x400a, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a9ee7bc59c90..a84bdeeed2c6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -11000,6 +11000,67 @@ struct mlx5_ifc_pbmc_reg_bits { u8 reserved_at_2e0[0x80]; }; +struct mlx5_ifc_sbpr_reg_bits { + u8 desc[0x1]; + u8 snap[0x1]; + u8 reserved_at_2[0x4]; + u8 dir[0x2]; + u8 reserved_at_8[0x14]; + u8 pool[0x4]; + + u8 infi_size[0x1]; + u8 reserved_at_21[0x7]; + u8 size[0x18]; + + u8 reserved_at_40[0x1c]; + u8 mode[0x4]; + + u8 reserved_at_60[0x8]; + u8 buff_occupancy[0x18]; + + u8 clr[0x1]; + u8 reserved_at_81[0x7]; + u8 max_buff_occupancy[0x18]; + + u8 reserved_at_a0[0x8]; + u8 ext_buff_occupancy[0x18]; +}; + +struct mlx5_ifc_sbcm_reg_bits { + u8 desc[0x1]; + u8 snap[0x1]; + u8 reserved_at_2[0x6]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 pg_buff[0x6]; + u8 reserved_at_18[0x6]; + u8 dir[0x2]; + + u8 reserved_at_20[0x1f]; + u8 exc[0x1]; + + u8 reserved_at_40[0x40]; + + u8 reserved_at_80[0x8]; + u8 buff_occupancy[0x18]; + + u8 clr[0x1]; + u8 reserved_at_a1[0x7]; + u8 max_buff_occupancy[0x18]; + + u8 reserved_at_c0[0x8]; + u8 min_buff[0x18]; + + u8 infi_max[0x1]; + u8 reserved_at_e1[0x7]; + u8 max_buff[0x18]; + + u8 reserved_at_100[0x20]; + + u8 reserved_at_120[0x1c]; + u8 pool[0x4]; +}; + struct mlx5_ifc_qtct_reg_bits { u8 reserved_at_0[0x8]; u8 port_number[0x8]; -- cgit v1.2.3 From fe998a3c77b9f989a30a2a01fb00d3729a6d53a4 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 29 Jun 2022 11:38:21 +0300 Subject: net/mlx5: Enable management PF initialization Enable initialization of DPU Management PF, which is a new loopback PF designed for communication with BMC. For now Management PF doesn't support nor require most upper layer protocols so avoid them. Signed-off-by: Shay Drory Reviewed-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0c4f6acf59ca..50a5780367fa 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1202,6 +1202,11 @@ static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev) return dev->coredev_type == MLX5_COREDEV_VF; } +static inline bool mlx5_core_is_management_pf(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, num_ports) == 1 && !MLX5_CAP_GEN(dev, native_port_num); +} + static inline bool mlx5_core_is_ecpf(const struct mlx5_core_dev *dev) { return dev->caps.embedded_cpu; -- cgit v1.2.3 From 63fbae0a74c3e1df7c20c81e04353ced050d9887 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 2 Aug 2022 14:47:30 +0300 Subject: net/mlx5: Prevent high-rate FW commands from populating all slots Certain connection-based device-offload protocols (like TLS) use per-connection HW objects to track the state, maintain the context, and perform the offload properly. Some of these objects are created, modified, and destroyed via FW commands. Under high connection rate, this type of FW commands might continuously populate all slots of the FW command interface and throttle it, while starving other critical control FW commands. Limit these throttle commands to using only up to a portion (half) of the FW command interface slots. FW commands maximal rate is not hit, and the same high rate is still reached when applying this limitation. Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 50a5780367fa..7c393da396b1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -310,6 +310,7 @@ struct mlx5_cmd { struct workqueue_struct *wq; struct semaphore sem; struct semaphore pages_sem; + struct semaphore throttle_sem; int mode; u16 allowed_opcode; struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; -- cgit v1.2.3 From 8580e16c28f3f1a1bee87de115157161577334b4 Mon Sep 17 00:00:00 2001 From: Piergiorgio Beruto Date: Mon, 9 Jan 2023 17:59:39 +0100 Subject: net/ethtool: add netlink interface for the PLCA RS Add support for configuring the PLCA Reconciliation Sublayer on multi-drop PHYs that support IEEE802.3cg-2019 Clause 148 (e.g., 10BASE-T1S). This patch adds the appropriate netlink interface to ethtool. Signed-off-by: Piergiorgio Beruto Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/ethtool.h | 12 +++++++++++ include/linux/phy.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9e0a76fc7de9..d0da303f6634 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -802,12 +802,17 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, struct phy_device; struct phy_tdr_config; +struct phy_plca_cfg; +struct phy_plca_status; /** * struct ethtool_phy_ops - Optional PHY device options * @get_sset_count: Get number of strings that @get_strings will write. * @get_strings: Return a set of strings that describe the requested objects * @get_stats: Return extended statistics about the PHY device. + * @get_plca_cfg: Return PLCA configuration. + * @set_plca_cfg: Set PLCA configuration. + * @get_plca_status: Get PLCA configuration. * @start_cable_test: Start a cable test * @start_cable_test_tdr: Start a Time Domain Reflectometry cable test * @@ -819,6 +824,13 @@ struct ethtool_phy_ops { int (*get_strings)(struct phy_device *dev, u8 *data); int (*get_stats)(struct phy_device *dev, struct ethtool_stats *stats, u64 *data); + int (*get_plca_cfg)(struct phy_device *dev, + struct phy_plca_cfg *plca_cfg); + int (*set_plca_cfg)(struct phy_device *dev, + const struct phy_plca_cfg *plca_cfg, + struct netlink_ext_ack *extack); + int (*get_plca_status)(struct phy_device *dev, + struct phy_plca_status *plca_st); int (*start_cable_test)(struct phy_device *phydev, struct netlink_ext_ack *extack); int (*start_cable_test_tdr)(struct phy_device *phydev, diff --git a/include/linux/phy.h b/include/linux/phy.h index 89b43cda5bda..b82fdb0d82ed 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -773,6 +773,63 @@ struct phy_tdr_config { }; #define PHY_PAIR_ALL -1 +/** + * struct phy_plca_cfg - Configuration of the PLCA (Physical Layer Collision + * Avoidance) Reconciliation Sublayer. + * + * @version: read-only PLCA register map version. -1 = not available. Ignored + * when setting the configuration. Format is the same as reported by the PLCA + * IDVER register (31.CA00). -1 = not available. + * @enabled: PLCA configured mode (enabled/disabled). -1 = not available / don't + * set. 0 = disabled, anything else = enabled. + * @node_id: the PLCA local node identifier. -1 = not available / don't set. + * Allowed values [0 .. 254]. 255 = node disabled. + * @node_cnt: the PLCA node count (maximum number of nodes having a TO). Only + * meaningful for the coordinator (node_id = 0). -1 = not available / don't + * set. Allowed values [1 .. 255]. + * @to_tmr: The value of the PLCA to_timer in bit-times, which determines the + * PLCA transmit opportunity window opening. See IEEE802.3 Clause 148 for + * more details. The to_timer shall be set equal over all nodes. + * -1 = not available / don't set. Allowed values [0 .. 255]. + * @burst_cnt: controls how many additional frames a node is allowed to send in + * single transmit opportunity (TO). The default value of 0 means that the + * node is allowed exactly one frame per TO. A value of 1 allows two frames + * per TO, and so on. -1 = not available / don't set. + * Allowed values [0 .. 255]. + * @burst_tmr: controls how many bit times to wait for the MAC to send a new + * frame before interrupting the burst. This value should be set to a value + * greater than the MAC inter-packet gap (which is typically 96 bits). + * -1 = not available / don't set. Allowed values [0 .. 255]. + * + * A structure containing configuration parameters for setting/getting the PLCA + * RS configuration. The driver does not need to implement all the parameters, + * but should report what is actually used. + */ +struct phy_plca_cfg { + int version; + int enabled; + int node_id; + int node_cnt; + int to_tmr; + int burst_cnt; + int burst_tmr; +}; + +/** + * struct phy_plca_status - Status of the PLCA (Physical Layer Collision + * Avoidance) Reconciliation Sublayer. + * + * @pst: The PLCA status as reported by the PST bit in the PLCA STATUS + * register(31.CA03), indicating BEACON activity. + * + * A structure containing status information of the PLCA RS configuration. + * The driver does not need to implement all the parameters, but should report + * what is actually used. + */ +struct phy_plca_status { + bool pst; +}; + /** * struct phy_driver - Driver structure for a particular PHY type * -- cgit v1.2.3 From 16178c8ef53dc9734302c4c07633696454579ee3 Mon Sep 17 00:00:00 2001 From: Piergiorgio Beruto Date: Mon, 9 Jan 2023 17:59:58 +0100 Subject: drivers/net/phy: add the link modes for the 10BASE-T1S Ethernet PHY This patch adds the link modes for the IEEE 802.3cg Clause 147 10BASE-T1S Ethernet PHY. According to the specifications, the 10BASE-T1S supports Point-To-Point Full-Duplex, Point-To-Point Half-Duplex and/or Point-To-Multipoint (AKA Multi-Drop) Half-Duplex operations. Signed-off-by: Piergiorgio Beruto Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index b82fdb0d82ed..63b199f574f7 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -45,6 +45,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1s_p2mp_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init; @@ -54,6 +55,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_ini #define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features) #define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features) +#define PHY_BASIC_T1S_P2MP_FEATURES ((unsigned long *)&phy_basic_t1s_p2mp_features) #define PHY_GBIT_FEATURES ((unsigned long *)&phy_gbit_features) #define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features) #define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features) @@ -66,6 +68,7 @@ extern const int phy_fibre_port_array[1]; extern const int phy_all_ports_features_array[7]; extern const int phy_10_100_features_array[4]; extern const int phy_basic_t1_features_array[3]; +extern const int phy_basic_t1s_p2mp_features_array[2]; extern const int phy_gbit_features_array[2]; extern const int phy_10gbit_features_array[1]; @@ -1041,6 +1044,17 @@ struct phy_driver { int (*get_sqi)(struct phy_device *dev); /** @get_sqi_max: Get the maximum signal quality indication */ int (*get_sqi_max)(struct phy_device *dev); + + /* PLCA RS interface */ + /** @get_plca_cfg: Return the current PLCA configuration */ + int (*get_plca_cfg)(struct phy_device *dev, + struct phy_plca_cfg *plca_cfg); + /** @set_plca_cfg: Set the PLCA configuration */ + int (*set_plca_cfg)(struct phy_device *dev, + const struct phy_plca_cfg *plca_cfg); + /** @get_plca_status: Return the current PLCA status info */ + int (*get_plca_status)(struct phy_device *dev, + struct phy_plca_status *plca_st); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) -- cgit v1.2.3 From a23a1e57a6770a8fad1c6362dfe73bd9f27e430c Mon Sep 17 00:00:00 2001 From: Piergiorgio Beruto Date: Mon, 9 Jan 2023 18:00:10 +0100 Subject: drivers/net/phy: add connection between ethtool and phylib for PLCA This patch adds the required connection between netlink ethtool and phylib to resolve PLCA get/set config and get status messages. Signed-off-by: Piergiorgio Beruto Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 63b199f574f7..7c2ec1650975 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1851,6 +1851,13 @@ int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data); int phy_ethtool_get_sset_count(struct phy_device *phydev); int phy_ethtool_get_stats(struct phy_device *phydev, struct ethtool_stats *stats, u64 *data); +int phy_ethtool_get_plca_cfg(struct phy_device *phydev, + struct phy_plca_cfg *plca_cfg); +int phy_ethtool_set_plca_cfg(struct phy_device *phydev, + const struct phy_plca_cfg *plca_cfg, + struct netlink_ext_ack *extack); +int phy_ethtool_get_plca_status(struct phy_device *phydev, + struct phy_plca_status *plca_st); static inline int phy_package_read(struct phy_device *phydev, u32 regnum) { -- cgit v1.2.3 From 493323416fed6b1ec6128a65c00e5f01d38b7e17 Mon Sep 17 00:00:00 2001 From: Piergiorgio Beruto Date: Mon, 9 Jan 2023 18:00:23 +0100 Subject: drivers/net/phy: add helpers to get/set PLCA configuration This patch adds support in phylib to read/write PLCA configuration for Ethernet PHYs that support the OPEN Alliance "10BASE-T1S PLCA Management Registers" specifications. These can be found at https://www.opensig.org/about/specifications/ Signed-off-by: Piergiorgio Beruto Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7c2ec1650975..b3cf1e08e880 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1753,6 +1753,12 @@ int genphy_c45_loopback(struct phy_device *phydev, bool enable); int genphy_c45_pma_resume(struct phy_device *phydev); int genphy_c45_pma_suspend(struct phy_device *phydev); int genphy_c45_fast_retrain(struct phy_device *phydev, bool enable); +int genphy_c45_plca_get_cfg(struct phy_device *phydev, + struct phy_plca_cfg *plca_cfg); +int genphy_c45_plca_set_cfg(struct phy_device *phydev, + const struct phy_plca_cfg *plca_cfg); +int genphy_c45_plca_get_status(struct phy_device *phydev, + struct phy_plca_status *plca_st); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; -- cgit v1.2.3 From 5970e15dbcfeb0ed3a0bf1954f35bbe60a048754 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sun, 20 Nov 2022 09:15:34 -0500 Subject: filelock: move file locking definitions to separate header file The file locking definitions have lived in fs.h since the dawn of time, but they are only used by a small subset of the source files that include it. Move the file locking definitions to a new header file, and add the appropriate #include directives to the source files that need them. By doing this we trim down fs.h a bit and limit the amount of rebuilding that has to be done when we make changes to the file locking APIs. Reviewed-by: Xiubo Li Reviewed-by: Christian Brauner (Microsoft) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Reviewed-by: Russell King (Oracle) Acked-by: Chuck Lever Acked-by: Joseph Qi Acked-by: Steve French Acked-by: Al Viro Acked-by: Darrick J. Wong Signed-off-by: Jeff Layton --- include/linux/filelock.h | 441 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 429 -------------------------------------------- include/linux/lockd/xdr.h | 1 + 3 files changed, 442 insertions(+), 429 deletions(-) create mode 100644 include/linux/filelock.h (limited to 'include/linux') diff --git a/include/linux/filelock.h b/include/linux/filelock.h new file mode 100644 index 000000000000..dc5056a66e2c --- /dev/null +++ b/include/linux/filelock.h @@ -0,0 +1,441 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FILELOCK_H +#define _LINUX_FILELOCK_H + +#include + +#define FL_POSIX 1 +#define FL_FLOCK 2 +#define FL_DELEG 4 /* NFSv4 delegation */ +#define FL_ACCESS 8 /* not trying to lock, just looking */ +#define FL_EXISTS 16 /* when unlocking, test for existence */ +#define FL_LEASE 32 /* lease held on this file */ +#define FL_CLOSE 64 /* unlock on close */ +#define FL_SLEEP 128 /* A blocking lock */ +#define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ +#define FL_UNLOCK_PENDING 512 /* Lease is being broken */ +#define FL_OFDLCK 1024 /* lock is "owned" by struct file */ +#define FL_LAYOUT 2048 /* outstanding pNFS layout */ +#define FL_RECLAIM 4096 /* reclaiming from a reboot server */ + +#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) + +/* + * Special return value from posix_lock_file() and vfs_lock_file() for + * asynchronous locking. + */ +#define FILE_LOCK_DEFERRED 1 + +struct file_lock; + +struct file_lock_operations { + void (*fl_copy_lock)(struct file_lock *, struct file_lock *); + void (*fl_release_private)(struct file_lock *); +}; + +struct lock_manager_operations { + void *lm_mod_owner; + fl_owner_t (*lm_get_owner)(fl_owner_t); + void (*lm_put_owner)(fl_owner_t); + void (*lm_notify)(struct file_lock *); /* unblock callback */ + int (*lm_grant)(struct file_lock *, int); + bool (*lm_break)(struct file_lock *); + int (*lm_change)(struct file_lock *, int, struct list_head *); + void (*lm_setup)(struct file_lock *, void **); + bool (*lm_breaker_owns_lease)(struct file_lock *); + bool (*lm_lock_expirable)(struct file_lock *cfl); + void (*lm_expire_lock)(void); +}; + +struct lock_manager { + struct list_head list; + /* + * NFSv4 and up also want opens blocked during the grace period; + * NLM doesn't care: + */ + bool block_opens; +}; + +struct net; +void locks_start_grace(struct net *, struct lock_manager *); +void locks_end_grace(struct lock_manager *); +bool locks_in_grace(struct net *); +bool opens_in_grace(struct net *); + +/* + * struct file_lock has a union that some filesystems use to track + * their own private info. The NFS side of things is defined here: + */ +#include + +/* + * struct file_lock represents a generic "file lock". It's used to represent + * POSIX byte range locks, BSD (flock) locks, and leases. It's important to + * note that the same struct is used to represent both a request for a lock and + * the lock itself, but the same object is never used for both. + * + * FIXME: should we create a separate "struct lock_request" to help distinguish + * these two uses? + * + * The varous i_flctx lists are ordered by: + * + * 1) lock owner + * 2) lock range start + * 3) lock range end + * + * Obviously, the last two criteria only matter for POSIX locks. + */ +struct file_lock { + struct file_lock *fl_blocker; /* The lock, that is blocking us */ + struct list_head fl_list; /* link into file_lock_context */ + struct hlist_node fl_link; /* node in global lists */ + struct list_head fl_blocked_requests; /* list of requests with + * ->fl_blocker pointing here + */ + struct list_head fl_blocked_member; /* node in + * ->fl_blocker->fl_blocked_requests + */ + fl_owner_t fl_owner; + unsigned int fl_flags; + unsigned char fl_type; + unsigned int fl_pid; + int fl_link_cpu; /* what cpu's list is this on? */ + wait_queue_head_t fl_wait; + struct file *fl_file; + loff_t fl_start; + loff_t fl_end; + + struct fasync_struct * fl_fasync; /* for lease break notifications */ + /* for lease breaks: */ + unsigned long fl_break_time; + unsigned long fl_downgrade_time; + + const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ + const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ + union { + struct nfs_lock_info nfs_fl; + struct nfs4_lock_info nfs4_fl; + struct { + struct list_head link; /* link in AFS vnode's pending_locks list */ + int state; /* state of grant or error if -ve */ + unsigned int debug_id; + } afs; + struct { + struct inode *inode; + } ceph; + } fl_u; +} __randomize_layout; + +struct file_lock_context { + spinlock_t flc_lock; + struct list_head flc_flock; + struct list_head flc_posix; + struct list_head flc_lease; +}; + +#define locks_inode(f) file_inode(f) + +#ifdef CONFIG_FILE_LOCKING +int fcntl_getlk(struct file *, unsigned int, struct flock *); +int fcntl_setlk(unsigned int, struct file *, unsigned int, + struct flock *); + +#if BITS_PER_LONG == 32 +int fcntl_getlk64(struct file *, unsigned int, struct flock64 *); +int fcntl_setlk64(unsigned int, struct file *, unsigned int, + struct flock64 *); +#endif + +int fcntl_setlease(unsigned int fd, struct file *filp, long arg); +int fcntl_getlease(struct file *filp); + +/* fs/locks.c */ +void locks_free_lock_context(struct inode *inode); +void locks_free_lock(struct file_lock *fl); +void locks_init_lock(struct file_lock *); +struct file_lock * locks_alloc_lock(void); +void locks_copy_lock(struct file_lock *, struct file_lock *); +void locks_copy_conflock(struct file_lock *, struct file_lock *); +void locks_remove_posix(struct file *, fl_owner_t); +void locks_remove_file(struct file *); +void locks_release_private(struct file_lock *); +void posix_test_lock(struct file *, struct file_lock *); +int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); +int locks_delete_block(struct file_lock *); +int vfs_test_lock(struct file *, struct file_lock *); +int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); +int vfs_cancel_lock(struct file *filp, struct file_lock *fl); +bool vfs_inode_has_locks(struct inode *inode); +int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); +int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); +void lease_get_mtime(struct inode *, struct timespec64 *time); +int generic_setlease(struct file *, long, struct file_lock **, void **priv); +int vfs_setlease(struct file *, long, struct file_lock **, void **); +int lease_modify(struct file_lock *, int, struct list_head *); + +struct notifier_block; +int lease_register_notifier(struct notifier_block *); +void lease_unregister_notifier(struct notifier_block *); + +struct files_struct; +void show_fd_locks(struct seq_file *f, + struct file *filp, struct files_struct *files); +bool locks_owner_has_blockers(struct file_lock_context *flctx, + fl_owner_t owner); + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return smp_load_acquire(&inode->i_flctx); +} + +#else /* !CONFIG_FILE_LOCKING */ +static inline int fcntl_getlk(struct file *file, unsigned int cmd, + struct flock __user *user) +{ + return -EINVAL; +} + +static inline int fcntl_setlk(unsigned int fd, struct file *file, + unsigned int cmd, struct flock __user *user) +{ + return -EACCES; +} + +#if BITS_PER_LONG == 32 +static inline int fcntl_getlk64(struct file *file, unsigned int cmd, + struct flock64 *user) +{ + return -EINVAL; +} + +static inline int fcntl_setlk64(unsigned int fd, struct file *file, + unsigned int cmd, struct flock64 *user) +{ + return -EACCES; +} +#endif +static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) +{ + return -EINVAL; +} + +static inline int fcntl_getlease(struct file *filp) +{ + return F_UNLCK; +} + +static inline void +locks_free_lock_context(struct inode *inode) +{ +} + +static inline void locks_init_lock(struct file_lock *fl) +{ + return; +} + +static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) +{ + return; +} + +static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + return; +} + +static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) +{ + return; +} + +static inline void locks_remove_file(struct file *filp) +{ + return; +} + +static inline void posix_test_lock(struct file *filp, struct file_lock *fl) +{ + return; +} + +static inline int posix_lock_file(struct file *filp, struct file_lock *fl, + struct file_lock *conflock) +{ + return -ENOLCK; +} + +static inline int locks_delete_block(struct file_lock *waiter) +{ + return -ENOENT; +} + +static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) +{ + return 0; +} + +static inline int vfs_lock_file(struct file *filp, unsigned int cmd, + struct file_lock *fl, struct file_lock *conf) +{ + return -ENOLCK; +} + +static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) +{ + return 0; +} + +static inline bool vfs_inode_has_locks(struct inode *inode) +{ + return false; +} + +static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) +{ + return -ENOLCK; +} + +static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) +{ + return 0; +} + +static inline void lease_get_mtime(struct inode *inode, + struct timespec64 *time) +{ + return; +} + +static inline int generic_setlease(struct file *filp, long arg, + struct file_lock **flp, void **priv) +{ + return -EINVAL; +} + +static inline int vfs_setlease(struct file *filp, long arg, + struct file_lock **lease, void **priv) +{ + return -EINVAL; +} + +static inline int lease_modify(struct file_lock *fl, int arg, + struct list_head *dispose) +{ + return -EINVAL; +} + +struct files_struct; +static inline void show_fd_locks(struct seq_file *f, + struct file *filp, struct files_struct *files) {} +static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, + fl_owner_t owner) +{ + return false; +} + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return NULL; +} + +#endif /* !CONFIG_FILE_LOCKING */ + +static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return locks_lock_inode_wait(locks_inode(filp), fl); +} + +#ifdef CONFIG_FILE_LOCKING +static inline int break_lease(struct inode *inode, unsigned int mode) +{ + /* + * Since this check is lockless, we must ensure that any refcounts + * taken are done before checking i_flctx->flc_lease. Otherwise, we + * could end up racing with tasks trying to set a new lease on this + * file. + */ + smp_mb(); + if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) + return __break_lease(inode, mode, FL_LEASE); + return 0; +} + +static inline int break_deleg(struct inode *inode, unsigned int mode) +{ + /* + * Since this check is lockless, we must ensure that any refcounts + * taken are done before checking i_flctx->flc_lease. Otherwise, we + * could end up racing with tasks trying to set a new lease on this + * file. + */ + smp_mb(); + if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) + return __break_lease(inode, mode, FL_DELEG); + return 0; +} + +static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) +{ + int ret; + + ret = break_deleg(inode, O_WRONLY|O_NONBLOCK); + if (ret == -EWOULDBLOCK && delegated_inode) { + *delegated_inode = inode; + ihold(inode); + } + return ret; +} + +static inline int break_deleg_wait(struct inode **delegated_inode) +{ + int ret; + + ret = break_deleg(*delegated_inode, O_WRONLY); + iput(*delegated_inode); + *delegated_inode = NULL; + return ret; +} + +static inline int break_layout(struct inode *inode, bool wait) +{ + smp_mb(); + if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) + return __break_lease(inode, + wait ? O_WRONLY : O_WRONLY | O_NONBLOCK, + FL_LAYOUT); + return 0; +} + +#else /* !CONFIG_FILE_LOCKING */ +static inline int break_lease(struct inode *inode, unsigned int mode) +{ + return 0; +} + +static inline int break_deleg(struct inode *inode, unsigned int mode) +{ + return 0; +} + +static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) +{ + return 0; +} + +static inline int break_deleg_wait(struct inode **delegated_inode) +{ + BUG(); + return 0; +} + +static inline int break_layout(struct inode *inode, bool wait) +{ + return 0; +} + +#endif /* CONFIG_FILE_LOCKING */ + +#endif /* _LINUX_FILELOCK_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index c1769a2c5d70..006278d84d2f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1003,135 +1003,11 @@ static inline struct file *get_file(struct file *f) #define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX) #endif -#define FL_POSIX 1 -#define FL_FLOCK 2 -#define FL_DELEG 4 /* NFSv4 delegation */ -#define FL_ACCESS 8 /* not trying to lock, just looking */ -#define FL_EXISTS 16 /* when unlocking, test for existence */ -#define FL_LEASE 32 /* lease held on this file */ -#define FL_CLOSE 64 /* unlock on close */ -#define FL_SLEEP 128 /* A blocking lock */ -#define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ -#define FL_UNLOCK_PENDING 512 /* Lease is being broken */ -#define FL_OFDLCK 1024 /* lock is "owned" by struct file */ -#define FL_LAYOUT 2048 /* outstanding pNFS layout */ -#define FL_RECLAIM 4096 /* reclaiming from a reboot server */ - -#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) - -/* - * Special return value from posix_lock_file() and vfs_lock_file() for - * asynchronous locking. - */ -#define FILE_LOCK_DEFERRED 1 - /* legacy typedef, should eventually be removed */ typedef void *fl_owner_t; struct file_lock; -struct file_lock_operations { - void (*fl_copy_lock)(struct file_lock *, struct file_lock *); - void (*fl_release_private)(struct file_lock *); -}; - -struct lock_manager_operations { - void *lm_mod_owner; - fl_owner_t (*lm_get_owner)(fl_owner_t); - void (*lm_put_owner)(fl_owner_t); - void (*lm_notify)(struct file_lock *); /* unblock callback */ - int (*lm_grant)(struct file_lock *, int); - bool (*lm_break)(struct file_lock *); - int (*lm_change)(struct file_lock *, int, struct list_head *); - void (*lm_setup)(struct file_lock *, void **); - bool (*lm_breaker_owns_lease)(struct file_lock *); - bool (*lm_lock_expirable)(struct file_lock *cfl); - void (*lm_expire_lock)(void); -}; - -struct lock_manager { - struct list_head list; - /* - * NFSv4 and up also want opens blocked during the grace period; - * NLM doesn't care: - */ - bool block_opens; -}; - -struct net; -void locks_start_grace(struct net *, struct lock_manager *); -void locks_end_grace(struct lock_manager *); -bool locks_in_grace(struct net *); -bool opens_in_grace(struct net *); - -/* that will die - we need it for nfs_lock_info */ -#include - -/* - * struct file_lock represents a generic "file lock". It's used to represent - * POSIX byte range locks, BSD (flock) locks, and leases. It's important to - * note that the same struct is used to represent both a request for a lock and - * the lock itself, but the same object is never used for both. - * - * FIXME: should we create a separate "struct lock_request" to help distinguish - * these two uses? - * - * The varous i_flctx lists are ordered by: - * - * 1) lock owner - * 2) lock range start - * 3) lock range end - * - * Obviously, the last two criteria only matter for POSIX locks. - */ -struct file_lock { - struct file_lock *fl_blocker; /* The lock, that is blocking us */ - struct list_head fl_list; /* link into file_lock_context */ - struct hlist_node fl_link; /* node in global lists */ - struct list_head fl_blocked_requests; /* list of requests with - * ->fl_blocker pointing here - */ - struct list_head fl_blocked_member; /* node in - * ->fl_blocker->fl_blocked_requests - */ - fl_owner_t fl_owner; - unsigned int fl_flags; - unsigned char fl_type; - unsigned int fl_pid; - int fl_link_cpu; /* what cpu's list is this on? */ - wait_queue_head_t fl_wait; - struct file *fl_file; - loff_t fl_start; - loff_t fl_end; - - struct fasync_struct * fl_fasync; /* for lease break notifications */ - /* for lease breaks: */ - unsigned long fl_break_time; - unsigned long fl_downgrade_time; - - const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ - const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ - union { - struct nfs_lock_info nfs_fl; - struct nfs4_lock_info nfs4_fl; - struct { - struct list_head link; /* link in AFS vnode's pending_locks list */ - int state; /* state of grant or error if -ve */ - unsigned int debug_id; - } afs; - struct { - struct inode *inode; - } ceph; - } fl_u; -} __randomize_layout; - -struct file_lock_context { - spinlock_t flc_lock; - struct list_head flc_flock; - struct list_head flc_posix; - struct list_head flc_lease; -}; - /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX #define OFFSET_MAX type_max(loff_t) @@ -1140,216 +1016,6 @@ struct file_lock_context { extern void send_sigio(struct fown_struct *fown, int fd, int band); -#define locks_inode(f) file_inode(f) - -#ifdef CONFIG_FILE_LOCKING -extern int fcntl_getlk(struct file *, unsigned int, struct flock *); -extern int fcntl_setlk(unsigned int, struct file *, unsigned int, - struct flock *); - -#if BITS_PER_LONG == 32 -extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 *); -extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, - struct flock64 *); -#endif - -extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); -extern int fcntl_getlease(struct file *filp); - -/* fs/locks.c */ -void locks_free_lock_context(struct inode *inode); -void locks_free_lock(struct file_lock *fl); -extern void locks_init_lock(struct file_lock *); -extern struct file_lock * locks_alloc_lock(void); -extern void locks_copy_lock(struct file_lock *, struct file_lock *); -extern void locks_copy_conflock(struct file_lock *, struct file_lock *); -extern void locks_remove_posix(struct file *, fl_owner_t); -extern void locks_remove_file(struct file *); -extern void locks_release_private(struct file_lock *); -extern void posix_test_lock(struct file *, struct file_lock *); -extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); -extern int locks_delete_block(struct file_lock *); -extern int vfs_test_lock(struct file *, struct file_lock *); -extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); -extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); -bool vfs_inode_has_locks(struct inode *inode); -extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); -extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); -extern void lease_get_mtime(struct inode *, struct timespec64 *time); -extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); -extern int vfs_setlease(struct file *, long, struct file_lock **, void **); -extern int lease_modify(struct file_lock *, int, struct list_head *); - -struct notifier_block; -extern int lease_register_notifier(struct notifier_block *); -extern void lease_unregister_notifier(struct notifier_block *); - -struct files_struct; -extern void show_fd_locks(struct seq_file *f, - struct file *filp, struct files_struct *files); -extern bool locks_owner_has_blockers(struct file_lock_context *flctx, - fl_owner_t owner); - -static inline struct file_lock_context * -locks_inode_context(const struct inode *inode) -{ - return smp_load_acquire(&inode->i_flctx); -} - -#else /* !CONFIG_FILE_LOCKING */ -static inline int fcntl_getlk(struct file *file, unsigned int cmd, - struct flock __user *user) -{ - return -EINVAL; -} - -static inline int fcntl_setlk(unsigned int fd, struct file *file, - unsigned int cmd, struct flock __user *user) -{ - return -EACCES; -} - -#if BITS_PER_LONG == 32 -static inline int fcntl_getlk64(struct file *file, unsigned int cmd, - struct flock64 *user) -{ - return -EINVAL; -} - -static inline int fcntl_setlk64(unsigned int fd, struct file *file, - unsigned int cmd, struct flock64 *user) -{ - return -EACCES; -} -#endif -static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) -{ - return -EINVAL; -} - -static inline int fcntl_getlease(struct file *filp) -{ - return F_UNLCK; -} - -static inline void -locks_free_lock_context(struct inode *inode) -{ -} - -static inline void locks_init_lock(struct file_lock *fl) -{ - return; -} - -static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) -{ - return; -} - -static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) -{ - return; -} - -static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) -{ - return; -} - -static inline void locks_remove_file(struct file *filp) -{ - return; -} - -static inline void posix_test_lock(struct file *filp, struct file_lock *fl) -{ - return; -} - -static inline int posix_lock_file(struct file *filp, struct file_lock *fl, - struct file_lock *conflock) -{ - return -ENOLCK; -} - -static inline int locks_delete_block(struct file_lock *waiter) -{ - return -ENOENT; -} - -static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) -{ - return 0; -} - -static inline int vfs_lock_file(struct file *filp, unsigned int cmd, - struct file_lock *fl, struct file_lock *conf) -{ - return -ENOLCK; -} - -static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) -{ - return 0; -} - -static inline bool vfs_inode_has_locks(struct inode *inode) -{ - return false; -} - -static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) -{ - return -ENOLCK; -} - -static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) -{ - return 0; -} - -static inline void lease_get_mtime(struct inode *inode, - struct timespec64 *time) -{ - return; -} - -static inline int generic_setlease(struct file *filp, long arg, - struct file_lock **flp, void **priv) -{ - return -EINVAL; -} - -static inline int vfs_setlease(struct file *filp, long arg, - struct file_lock **lease, void **priv) -{ - return -EINVAL; -} - -static inline int lease_modify(struct file_lock *fl, int arg, - struct list_head *dispose) -{ - return -EINVAL; -} - -struct files_struct; -static inline void show_fd_locks(struct seq_file *f, - struct file *filp, struct files_struct *files) {} -static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, - fl_owner_t owner) -{ - return false; -} - -static inline struct file_lock_context * -locks_inode_context(const struct inode *inode) -{ - return NULL; -} - -#endif /* !CONFIG_FILE_LOCKING */ - static inline struct inode *file_inode(const struct file *f) { return f->f_inode; @@ -1360,11 +1026,6 @@ static inline struct dentry *file_dentry(const struct file *file) return d_real(file->f_path.dentry, file_inode(file)); } -static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return locks_lock_inode_wait(locks_inode(filp), fl); -} - struct fasync_struct { rwlock_t fa_lock; int magic; @@ -2624,96 +2285,6 @@ extern struct kobject *fs_kobj; #define MAX_RW_COUNT (INT_MAX & PAGE_MASK) -#ifdef CONFIG_FILE_LOCKING -static inline int break_lease(struct inode *inode, unsigned int mode) -{ - /* - * Since this check is lockless, we must ensure that any refcounts - * taken are done before checking i_flctx->flc_lease. Otherwise, we - * could end up racing with tasks trying to set a new lease on this - * file. - */ - smp_mb(); - if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) - return __break_lease(inode, mode, FL_LEASE); - return 0; -} - -static inline int break_deleg(struct inode *inode, unsigned int mode) -{ - /* - * Since this check is lockless, we must ensure that any refcounts - * taken are done before checking i_flctx->flc_lease. Otherwise, we - * could end up racing with tasks trying to set a new lease on this - * file. - */ - smp_mb(); - if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) - return __break_lease(inode, mode, FL_DELEG); - return 0; -} - -static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) -{ - int ret; - - ret = break_deleg(inode, O_WRONLY|O_NONBLOCK); - if (ret == -EWOULDBLOCK && delegated_inode) { - *delegated_inode = inode; - ihold(inode); - } - return ret; -} - -static inline int break_deleg_wait(struct inode **delegated_inode) -{ - int ret; - - ret = break_deleg(*delegated_inode, O_WRONLY); - iput(*delegated_inode); - *delegated_inode = NULL; - return ret; -} - -static inline int break_layout(struct inode *inode, bool wait) -{ - smp_mb(); - if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) - return __break_lease(inode, - wait ? O_WRONLY : O_WRONLY | O_NONBLOCK, - FL_LAYOUT); - return 0; -} - -#else /* !CONFIG_FILE_LOCKING */ -static inline int break_lease(struct inode *inode, unsigned int mode) -{ - return 0; -} - -static inline int break_deleg(struct inode *inode, unsigned int mode) -{ - return 0; -} - -static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) -{ - return 0; -} - -static inline int break_deleg_wait(struct inode **delegated_inode) -{ - BUG(); - return 0; -} - -static inline int break_layout(struct inode *inode, bool wait) -{ - return 0; -} - -#endif /* CONFIG_FILE_LOCKING */ - /* fs/open.c */ struct audit_names; struct filename { diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 67e4a2c5500b..b60fbcd8cdfa 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -11,6 +11,7 @@ #define LOCKD_XDR_H #include +#include #include #include -- cgit v1.2.3 From c65454a947263dfdf482076388aaed60af84ca2f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 25 Nov 2022 08:48:37 -0500 Subject: fs: remove locks_inode locks_inode was turned into a wrapper around file_inode in de2a4a501e71 (Partially revert "locks: fix file locking on overlayfs"). Finish replacing locks_inode invocations everywhere with file_inode. Acked-by: Miklos Szeredi Acked-by: Al Viro Reviewed-by: David Howells Signed-off-by: Jeff Layton --- include/linux/filelock.h | 4 +--- include/linux/lockd/lockd.h | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filelock.h b/include/linux/filelock.h index dc5056a66e2c..efcdd1631d9b 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -133,8 +133,6 @@ struct file_lock_context { struct list_head flc_lease; }; -#define locks_inode(f) file_inode(f) - #ifdef CONFIG_FILE_LOCKING int fcntl_getlk(struct file *, unsigned int, struct flock *); int fcntl_setlk(unsigned int, struct file *, unsigned int, @@ -345,7 +343,7 @@ locks_inode_context(const struct inode *inode) static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { - return locks_lock_inode_wait(locks_inode(filp), fl); + return locks_lock_inode_wait(file_inode(filp), fl); } #ifdef CONFIG_FILE_LOCKING diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 70ce419e2709..2b7f067af3c4 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -312,7 +312,7 @@ static inline struct file *nlmsvc_file_file(struct nlm_file *file) static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { - return locks_inode(nlmsvc_file_file(file)); + return file_inode(nlmsvc_file_file(file)); } static inline int __nlm_privileged_request4(const struct sockaddr *sap) @@ -372,7 +372,7 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) static inline int nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { - return locks_inode(fl1->fl_file) == locks_inode(fl2->fl_file) + return file_inode(fl1->fl_file) == file_inode(fl2->fl_file) && fl1->fl_pid == fl2->fl_pid && fl1->fl_owner == fl2->fl_owner && fl1->fl_start == fl2->fl_start -- cgit v1.2.3 From 2364b406824f1f42026d87c1e26d4dd1ca0f65af Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 9 Jan 2023 11:13:53 +0106 Subject: printk: move size limit macros into internal.h The size limit macros are located further down in printk.c and behind ifdef conditionals. This complicates their usage for upcoming changes. Move the macros into internal.h so that they are still invisible outside of printk, but easily accessible for printk. Also, the maximum size of formatted extended messages does not need to be known by any code outside of printk, so move it to internal.h as well. And like CONSOLE_LOG_MAX, for !CONFIG_PRINTK set CONSOLE_EXT_LOG_MAX to 0 to reduce the static memory footprint. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230109100800.1085541-2-john.ogness@linutronix.de --- include/linux/printk.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 8c81806c2e99..8ef499ab3c1e 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -44,8 +44,6 @@ static inline const char *printk_skip_headers(const char *buffer) return buffer; } -#define CONSOLE_EXT_LOG_MAX 8192 - /* printk's without a loglevel use this.. */ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT -- cgit v1.2.3 From 717a5651b10940ec827fe07acfb956d906250b2b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2023 11:13:54 +0106 Subject: console: Use BIT() macros for @flags values Rather than manually calculating powers of 2, use the BIT() macros. Also take this opportunatity to cleanup and restructure the value comments into proper kerneldoc comments. Signed-off-by: Thomas Gleixner Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230109100800.1085541-3-john.ogness@linutronix.de --- include/linux/console.h | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 9cea254b34b8..ed804dd7c2e8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -15,6 +15,7 @@ #define _LINUX_CONSOLE_H_ 1 #include +#include #include #include @@ -125,18 +126,43 @@ static inline int con_debug_leave(void) /* * The interface for a console, or any other device that wants to capture * console messages (printer driver?) - * - * If a console driver is marked CON_BOOT then it will be auto-unregistered - * when the first real console is registered. This is for early-printk drivers. */ -#define CON_PRINTBUFFER (1) -#define CON_CONSDEV (2) /* Preferred console, /dev/console */ -#define CON_ENABLED (4) -#define CON_BOOT (8) -#define CON_ANYTIME (16) /* Safe to call when cpu is offline */ -#define CON_BRL (32) /* Used for a braille device */ -#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ +/** + * cons_flags - General console flags + * @CON_PRINTBUFFER: Used by newly registered consoles to avoid duplicate + * output of messages that were already shown by boot + * consoles or read by userspace via syslog() syscall. + * @CON_CONSDEV: Indicates that the console driver is backing + * /dev/console. + * @CON_ENABLED: Indicates if a console is allowed to print records. If + * false, the console also will not advance to later + * records. + * @CON_BOOT: Marks the console driver as early console driver which + * is used during boot before the real driver becomes + * available. It will be automatically unregistered + * when the real console driver is registered unless + * "keep_bootcon" parameter is used. + * @CON_ANYTIME: A misnomed historical flag which tells the core code + * that the legacy @console::write callback can be invoked + * on a CPU which is marked OFFLINE. That is misleading as + * it suggests that there is no contextual limit for + * invoking the callback. The original motivation was + * readiness of the per-CPU areas. + * @CON_BRL: Indicates a braille device which is exempt from + * receiving the printk spam for obvious reasons. + * @CON_EXTENDED: The console supports the extended output format of + * /dev/kmesg which requires a larger output buffer. + */ +enum cons_flags { + CON_PRINTBUFFER = BIT(0), + CON_CONSDEV = BIT(1), + CON_ENABLED = BIT(2), + CON_BOOT = BIT(3), + CON_ANYTIME = BIT(4), + CON_BRL = BIT(5), + CON_EXTENDED = BIT(6), +}; struct console { char name[16]; -- cgit v1.2.3 From 02b2396d7d0cf806e80c887b4c799d482d6977ed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2023 11:13:55 +0106 Subject: console: Document struct console Add kerneldoc comments to struct console. Signed-off-by: Thomas Gleixner Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230109100800.1085541-4-john.ogness@linutronix.de --- include/linux/console.h | 54 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index ed804dd7c2e8..1e36958aa656 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -164,24 +164,44 @@ enum cons_flags { CON_EXTENDED = BIT(6), }; +/** + * struct console - The console descriptor structure + * @name: The name of the console driver + * @write: Write callback to output messages (Optional) + * @read: Read callback for console input (Optional) + * @device: The underlying TTY device driver (Optional) + * @unblank: Callback to unblank the console (Optional) + * @setup: Callback for initializing the console (Optional) + * @exit: Callback for teardown of the console (Optional) + * @match: Callback for matching a console (Optional) + * @flags: Console flags. See enum cons_flags + * @index: Console index, e.g. port number + * @cflag: TTY control mode flags + * @ispeed: TTY input speed + * @ospeed: TTY output speed + * @seq: Sequence number of the next ringbuffer record to print + * @dropped: Number of unreported dropped ringbuffer records + * @data: Driver private data + * @node: hlist node for the console list + */ struct console { - char name[16]; - void (*write)(struct console *, const char *, unsigned); - int (*read)(struct console *, char *, unsigned); - struct tty_driver *(*device)(struct console *, int *); - void (*unblank)(void); - int (*setup)(struct console *, char *); - int (*exit)(struct console *); - int (*match)(struct console *, char *name, int idx, char *options); - short flags; - short index; - int cflag; - uint ispeed; - uint ospeed; - u64 seq; - unsigned long dropped; - void *data; - struct hlist_node node; + char name[16]; + void (*write)(struct console *co, const char *s, unsigned int count); + int (*read)(struct console *co, char *s, unsigned int count); + struct tty_driver *(*device)(struct console *co, int *index); + void (*unblank)(void); + int (*setup)(struct console *co, char *options); + int (*exit)(struct console *co); + int (*match)(struct console *co, char *name, int idx, char *options); + short flags; + short index; + int cflag; + uint ispeed; + uint ospeed; + u64 seq; + unsigned long dropped; + void *data; + struct hlist_node node; }; #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From 504fa212d7030fb1c042290dc2eb92b21515573a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 28 Dec 2022 00:21:52 +0100 Subject: driver core: Make driver_deferred_probe_timeout a static variable It is not used outside of its compilation unit, so there's no need to export this variable. Signed-off-by: Javier Martinez Canillas Reviewed-by: Andrew Halaney Acked-by: John Stultz Link: https://lore.kernel.org/r/20221227232152.3094584-1-javierm@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 2114d65b862f..50d0a416a5e7 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -240,7 +240,6 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev) } #endif -extern int driver_deferred_probe_timeout; void driver_deferred_probe_add(struct device *dev); int driver_deferred_probe_check_state(struct device *dev); void driver_init(void); -- cgit v1.2.3 From 17cde5e601b165174e8a433b550f84f362731164 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 17:06:42 -0400 Subject: genirq/msi: Add msi_device_has_isolated_msi() This will replace irq_domain_check_msi_remap() in following patches. The new API makes it more clear what "msi_remap" actually means from a functional perspective instead of identifying an implementation specific HW feature. Isolated MSI means that HW modeled by an irq_domain on the path from the initiating device to the CPU will validate that the MSI message specifies an interrupt number that the device is authorized to trigger. This must block devices from triggering interrupts they are not authorized to trigger. Currently authorization means the MSI vector is one assigned to the device. This is interesting for securing VFIO use cases where a rouge MSI (eg created by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to impact outside its security domain, eg userspace triggering interrupts on kernel drivers, a VM triggering interrupts on the hypervisor, or a VM triggering interrupts on another VM. As this is actually modeled as a per-irq_domain property, not a global platform property, correct the interface to accept the device parameter and scan through only the part of the irq_domains hierarchy originating from the source device. Locate the new code in msi.c as it naturally only works with CONFIG_GENERIC_MSI_IRQ, which also requires CONFIG_IRQ_DOMAIN and IRQ_DOMAIN_HIERARCHY. Link: https://lore.kernel.org/r/1-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Thomas Gleixner Signed-off-by: Jason Gunthorpe --- include/linux/msi.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index a112b913fff9..e8a3f3a8a7f4 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -649,6 +649,19 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); + +bool msi_device_has_isolated_msi(struct device *dev); +#else /* CONFIG_GENERIC_MSI_IRQ */ +static inline bool msi_device_has_isolated_msi(struct device *dev) +{ + /* + * Arguably if the platform does not enable MSI support then it has + * "isolated MSI", as an interrupt controller that cannot receive MSIs + * is inherently isolated by our definition. As nobody seems to needs + * this be conservative and return false anyhow. + */ + return false; +} #endif /* CONFIG_GENERIC_MSI_IRQ */ /* PCI specific interfaces */ -- cgit v1.2.3 From efc30a8f15a7981737297f9e9c62fb814d74e268 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 9 Dec 2022 13:23:08 -0400 Subject: iommu: Add iommu_group_has_isolated_msi() Compute the isolated_msi over all the devices in the IOMMU group because iommufd and vfio both need to know that the entire group is isolated before granting access to it. Link: https://lore.kernel.org/r/2-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 46e1347bfa22..9b7a9fa5ad28 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -455,6 +455,7 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); +extern bool iommu_group_has_isolated_msi(struct iommu_group *group); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); extern struct iommu_group *iommu_group_get_by_id(int id); extern void iommu_domain_free(struct iommu_domain *domain); -- cgit v1.2.3 From a5e72a6bac14181249ffd04f35f6a7c9bf47fbb9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 8 Dec 2022 10:51:02 -0400 Subject: genirq/irqdomain: Remove unused irq_domain_check_msi_remap() code After converting the users of irq_domain_check_msi_remap() it and the helpers are no longer needed. The new version does not require all the #ifdef helpers and inlines because CONFIG_GENERIC_MSI_IRQ always requires CONFIG_IRQ_DOMAIN and IRQ_DOMAIN_HIERARCHY. Link: https://lore.kernel.org/r/5-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Thomas Gleixner Signed-off-by: Jason Gunthorpe --- include/linux/irqdomain.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a372086750ca..b04ce03d3bb6 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -276,7 +276,6 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, void *host_data); extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); -extern bool irq_domain_check_msi_remap(void); extern void irq_set_default_host(struct irq_domain *host); extern struct irq_domain *irq_get_default_host(void); extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, @@ -559,13 +558,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain) return domain->flags & IRQ_DOMAIN_FLAG_MSI; } -static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) -{ - return domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP; -} - -extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain); - static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT; @@ -611,17 +603,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain) return false; } -static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) -{ - return false; -} - -static inline bool -irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) -{ - return false; -} - static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) { return false; @@ -641,10 +622,6 @@ static inline struct irq_domain *irq_find_matching_fwnode( { return NULL; } -static inline bool irq_domain_check_msi_remap(void) -{ - return false; -} #endif /* !CONFIG_IRQ_DOMAIN */ #endif /* _LINUX_IRQDOMAIN_H */ -- cgit v1.2.3 From dcb83f6ec1bf08a44b3f19719b56e8dc18058ff5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 20:12:43 -0400 Subject: genirq/msi: Rename IRQ_DOMAIN_MSI_REMAP to IRQ_DOMAIN_ISOLATED_MSI What x86 calls "interrupt remapping" is one way to achieve isolated MSI, make it clear this is talking about isolated MSI, no matter how it is achieved. This matches the new driver facing API name of msi_device_has_isolated_msi() No functional change. Link: https://lore.kernel.org/r/6-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Thomas Gleixner Signed-off-by: Jason Gunthorpe --- include/linux/irqdomain.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index b04ce03d3bb6..0a3e974b7288 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -192,8 +192,10 @@ enum { /* Irq domain implements MSIs */ IRQ_DOMAIN_FLAG_MSI = (1 << 4), - /* Irq domain implements MSI remapping */ - IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), + /* + * Irq domain implements isolated MSI, see msi_device_has_isolated_msi() + */ + IRQ_DOMAIN_FLAG_ISOLATED_MSI = (1 << 5), /* Irq domain doesn't translate anything */ IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6), -- cgit v1.2.3 From bf210f793937a634bae6eda6a6d699c00b2b53d9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 20:31:57 -0400 Subject: irq/s390: Add arch_is_isolated_msi() for s390 s390 doesn't use irq_domains, so it has no place to set IRQ_DOMAIN_FLAG_ISOLATED_MSI. Instead of continuing to abuse the iommu subsystem to convey this information add a simple define which s390 can make statically true. The define will cause msi_device_has_isolated() to return true. Remove IOMMU_CAP_INTR_REMAP from the s390 iommu driver. Link: https://lore.kernel.org/r/8-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Reviewed-by: Matthew Rosato Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/linux/msi.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index e8a3f3a8a7f4..13c9b74a4575 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -48,6 +48,10 @@ typedef struct arch_msi_msg_data { } __attribute__ ((packed)) arch_msi_msg_data_t; #endif +#ifndef arch_is_isolated_msi +#define arch_is_isolated_msi() false +#endif + /** * msi_msg - Representation of a MSI message * @address_lo: Low 32 bits of msi message address @@ -657,10 +661,10 @@ static inline bool msi_device_has_isolated_msi(struct device *dev) /* * Arguably if the platform does not enable MSI support then it has * "isolated MSI", as an interrupt controller that cannot receive MSIs - * is inherently isolated by our definition. As nobody seems to needs - * this be conservative and return false anyhow. + * is inherently isolated by our definition. The default definition for + * arch_is_isolated_msi() is conservative and returns false anyhow. */ - return false; + return arch_is_isolated_msi(); } #endif /* CONFIG_GENERIC_MSI_IRQ */ -- cgit v1.2.3 From b062007c63eb4452f1122384e86d402531fb1d52 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 20:34:32 -0400 Subject: iommu: Remove IOMMU_CAP_INTR_REMAP No iommu driver implements this any more, get rid of it. Link: https://lore.kernel.org/r/9-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9b7a9fa5ad28..933cc57bfc48 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -120,7 +120,6 @@ static inline bool iommu_is_dma_domain(struct iommu_domain *domain) enum iommu_cap { IOMMU_CAP_CACHE_COHERENCY, /* IOMMU_CACHE is supported */ - IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */ IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for DMA protection and we should too */ -- cgit v1.2.3 From a1193de562f54c7c9f60ca9f2db96e50a7608de1 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 4 Jan 2023 16:02:40 -0800 Subject: mm: fix vma->anon_name memory leak for anonymous shmem VMAs free_anon_vma_name() is missing a check for anonymous shmem VMA which leads to a memory leak due to refcount not being dropped. Fix this by calling anon_vma_name_put() unconditionally. It will free vma->anon_name whenever it's non-NULL. Link: https://lkml.kernel.org/r/20230105000241.1450843-1-surenb@google.com Fixes: d09e8ca6cb93 ("mm: anonymous shared memory naming") Signed-off-by: Suren Baghdasaryan Suggested-by: David Hildenbrand Reviewed-by: David Hildenbrand Reported-by: syzbot+91edf9178386a07d06a7@syzkaller.appspotmail.com Cc: Hugh Dickins Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index e8ed225d8f7c..ff3f3f23f649 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -413,8 +413,7 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma) * Not using anon_vma_name because it generates a warning if mmap_lock * is not held, which might be the case here. */ - if (!vma->vm_file) - anon_vma_name_put(vma->anon_name); + anon_vma_name_put(vma->anon_name); } static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, -- cgit v1.2.3 From 0411d6ee50e3b74a793848e4f41f45860163f5cc Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 6 Jan 2023 20:33:31 +0000 Subject: include/linux/mm: fix release_pages_arg kernel doc comment Commit 449c796768c9 ("mm: teach release_pages() to take an array of encoded page pointers too") added the kernel doc comment for release_pages() on top of 'union release_pages_arg', so making 'make htmldocs' complains as below: ./include/linux/mm.h:1268: warning: cannot understand function prototype: 'typedef union ' The kernel doc comment for the function is already on top of the function's definition in mm/swap.c, and the new comment is actually not for the function but indeed release_pages_arg. Fixing the comment to reflect the intent would be one option. But, kernel doc cannot parse the union as below due to the attribute. ./include/linux/mm.h:1272: error: Cannot parse struct or union! Modify the comment to reflect the intent but do not mark it as a kernel doc comment. Link: https://lkml.kernel.org/r/20230106203331.127532-1-sj@kernel.org Fixes: 449c796768c9 ("mm: teach release_pages() to take an array of encoded page pointers too") Signed-off-by: SeongJae Park Acked-by: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f3f196e4d66d..8f857163ac89 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1270,10 +1270,10 @@ static inline void folio_put_refs(struct folio *folio, int refs) __folio_put(folio); } -/** - * release_pages - release an array of pages or folios +/* + * union release_pages_arg - an array of pages or folios * - * This just releases a simple array of multiple pages, and + * release_pages() releases a simple array of multiple pages, and * accepts various different forms of said page array: either * a regular old boring array of pages, an array of folios, or * an array of encoded page pointers. -- cgit v1.2.3 From 8651a137e62ebfde3df95cbb1ca055d013ec5b9e Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sat, 7 Jan 2023 00:00:05 +0000 Subject: mm: update mmap_sem comments to refer to mmap_lock The rename from mm->mmap_sem to mm->mmap_lock was performed in commit da1c55f1b272 ("mmap locking API: rename mmap_sem to mmap_lock") and commit c1e8d7c6a7a6 ("map locking API: convert mmap_sem comments"), however some incorrect comments remain. This patch simply corrects those comments which are obviously incorrect within mm itself. Link: https://lkml.kernel.org/r/33fba04389ab63fc4980e7ba5442f521df6dc657.1673048927.git.lstoakes@gmail.com Signed-off-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 2 +- include/linux/page_ref.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3b8475007734..9757067c3053 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -581,7 +581,7 @@ struct vm_area_struct { /* * For private and shared anonymous mappings, a pointer to a null * terminated string containing the name given to the vma, or NULL if - * unnamed. Serialized by mmap_sem. Use anon_vma_name to access. + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. */ struct anon_vma_name *anon_name; #endif diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 2e677e6ad09f..d7c2d33baa7f 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -301,7 +301,7 @@ static inline bool folio_ref_try_add_rcu(struct folio *folio, int count) * * You can also use this function if you're holding a lock that prevents * pages being frozen & removed; eg the i_pages lock for the page cache - * or the mmap_sem or page table lock for page tables. In this case, + * or the mmap_lock or page table lock for page tables. In this case, * it will always succeed, and you could have used a plain folio_get(), * but it's sometimes more convenient to have a common function called * from both locked and RCU-protected contexts. -- cgit v1.2.3 From 0d7bb85e941327064c1f33128af563fac6cb9be3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 Sep 2022 15:38:56 +0200 Subject: ARM: omap1: remove unused board files All board support that was marked as 'unused' earlier can now be removed, leaving the five machines that that still had someone using them in 2022, or that are supported in qemu. Cc: Aaro Koskinen Cc: Janusz Krzysztofik Cc: Tony Lindgren Cc: linux-omap@vger.kernel.org Signed-off-by: Arnd Bergmann --- include/linux/platform_data/leds-omap.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/linux/platform_data/leds-omap.h (limited to 'include/linux') diff --git a/include/linux/platform_data/leds-omap.h b/include/linux/platform_data/leds-omap.h deleted file mode 100644 index dd1a3ec86fe4..000000000000 --- a/include/linux/platform_data/leds-omap.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2006 Samsung Electronics - * Kyungmin Park - */ -#ifndef ASMARM_ARCH_LED_H -#define ASMARM_ARCH_LED_H - -struct omap_led_config { - struct led_classdev cdev; - s16 gpio; -}; - -struct omap_led_platform_data { - s16 nr_leds; - struct omap_led_config *leds; -}; - -#endif -- cgit v1.2.3 From 8825acd7cc8a13af7ae6c2c5e5025af38df6c2e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 Sep 2022 16:19:44 +0200 Subject: ARM: omap1: remove dead code After the removal of the unused board files, I went through the omap1 code to look for code that no longer has any callers and remove that. In particular, support for the omap7xx/omap8xx family is now completely unused, so I'm only leaving omap15xx/omap16xx/omap59xx. Cc: Aaro Koskinen Cc: Janusz Krzysztofik Cc: linux-omap@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Acked-by: Tony Lindgren Acked-by: Kevin Hilman Signed-off-by: Arnd Bergmann --- include/linux/soc/ti/omap1-soc.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/omap1-soc.h b/include/linux/soc/ti/omap1-soc.h index 81008d400bb6..a42d9aa68648 100644 --- a/include/linux/soc/ti/omap1-soc.h +++ b/include/linux/soc/ti/omap1-soc.h @@ -20,22 +20,6 @@ #undef MULTI_OMAP1 #undef OMAP_NAME -#ifdef CONFIG_ARCH_OMAP730 -# ifdef OMAP_NAME -# undef MULTI_OMAP1 -# define MULTI_OMAP1 -# else -# define OMAP_NAME omap730 -# endif -#endif -#ifdef CONFIG_ARCH_OMAP850 -# ifdef OMAP_NAME -# undef MULTI_OMAP1 -# define MULTI_OMAP1 -# else -# define OMAP_NAME omap850 -# endif -#endif #ifdef CONFIG_ARCH_OMAP15XX # ifdef OMAP_NAME # undef MULTI_OMAP1 @@ -69,7 +53,6 @@ unsigned int omap_rev(void); /* * Macros to group OMAP into cpu classes. * These can be used in most places. - * cpu_is_omap7xx(): True for OMAP730, OMAP850 * cpu_is_omap15xx(): True for OMAP1510, OMAP5910 and OMAP310 * cpu_is_omap16xx(): True for OMAP1610, OMAP5912 and OMAP1710 */ @@ -89,23 +72,13 @@ static inline int is_omap ##subclass (void) \ return (GET_OMAP_SUBCLASS == (id)) ? 1 : 0; \ } -IS_OMAP_CLASS(7xx, 0x07) IS_OMAP_CLASS(15xx, 0x15) IS_OMAP_CLASS(16xx, 0x16) -#define cpu_is_omap7xx() 0 #define cpu_is_omap15xx() 0 #define cpu_is_omap16xx() 0 #if defined(MULTI_OMAP1) -# if defined(CONFIG_ARCH_OMAP730) -# undef cpu_is_omap7xx -# define cpu_is_omap7xx() is_omap7xx() -# endif -# if defined(CONFIG_ARCH_OMAP850) -# undef cpu_is_omap7xx -# define cpu_is_omap7xx() is_omap7xx() -# endif # if defined(CONFIG_ARCH_OMAP15XX) # undef cpu_is_omap15xx # define cpu_is_omap15xx() is_omap15xx() @@ -115,14 +88,6 @@ IS_OMAP_CLASS(16xx, 0x16) # define cpu_is_omap16xx() is_omap16xx() # endif #else -# if defined(CONFIG_ARCH_OMAP730) -# undef cpu_is_omap7xx -# define cpu_is_omap7xx() 1 -# endif -# if defined(CONFIG_ARCH_OMAP850) -# undef cpu_is_omap7xx -# define cpu_is_omap7xx() 1 -# endif # if defined(CONFIG_ARCH_OMAP15XX) # undef cpu_is_omap15xx # define cpu_is_omap15xx() 1 -- cgit v1.2.3 From c3848db316d51dcc0fb10554151b1e7e8ff8c3e2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 Sep 2022 16:14:18 +0200 Subject: ARM: davinci: drop DAVINCI_DMxxx references Support for all the dm3xx/dm64xx SoCs is no longer available, so drop all other references to those. Acked-by: Stephen Boyd Signed-off-by: Arnd Bergmann --- include/linux/clk/davinci.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/davinci.h b/include/linux/clk/davinci.h index f6ebab6228c2..e1d37451e03f 100644 --- a/include/linux/clk/davinci.h +++ b/include/linux/clk/davinci.h @@ -19,14 +19,5 @@ int da830_pll_init(struct device *dev, void __iomem *base, struct regmap *cfgchi #ifdef CONFIG_ARCH_DAVINCI_DA850 int da850_pll0_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); #endif -#ifdef CONFIG_ARCH_DAVINCI_DM355 -int dm355_pll1_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); -int dm355_psc_init(struct device *dev, void __iomem *base); -#endif -#ifdef CONFIG_ARCH_DAVINCI_DM365 -int dm365_pll1_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); -int dm365_pll2_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); -int dm365_psc_init(struct device *dev, void __iomem *base); -#endif #endif /* __LINUX_CLK_DAVINCI_PLL_H___ */ -- cgit v1.2.3 From 28c8e088427ad30b4260953f3b6f908972b77c2d Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 4 Jan 2023 14:20:54 -0500 Subject: rseq: Increase AT_VECTOR_SIZE_BASE to match rseq auxvec entries Two new auxiliary vector entries are introduced for rseq without matching increment of the AT_VECTOR_SIZE_BASE, which causes failures with CONFIG_HARDENED_USERCOPY=y. Fixes: 317c8194e6ae ("rseq: Introduce feature size and alignment ELF auxiliary vector entries") Reported-by: Nathan Chancellor Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20230104192054.34046-1-mathieu.desnoyers@efficios.com --- include/linux/auxvec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h index f68d0ec2d740..407f7005e6d6 100644 --- a/include/linux/auxvec.h +++ b/include/linux/auxvec.h @@ -4,6 +4,6 @@ #include -#define AT_VECTOR_SIZE_BASE 20 /* NEW_AUX_ENT entries in auxiliary table */ +#define AT_VECTOR_SIZE_BASE 22 /* NEW_AUX_ENT entries in auxiliary table */ /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ #endif /* _LINUX_AUXVEC_H */ -- cgit v1.2.3 From ed058eab22d64c00663563e8e1e112989c65c59f Mon Sep 17 00:00:00 2001 From: Henning Schild Date: Thu, 22 Dec 2022 11:37:19 +0100 Subject: platform/x86: simatic-ipc: correct name of a model What we called IPC427G should be renamed to BX-39A to be more in line with the actual product name. Signed-off-by: Henning Schild Link: https://lore.kernel.org/r/20221222103720.8546-2-henning.schild@siemens.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/simatic-ipc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h index 632320ec8f08..a4a6cba412cb 100644 --- a/include/linux/platform_data/x86/simatic-ipc.h +++ b/include/linux/platform_data/x86/simatic-ipc.h @@ -32,7 +32,7 @@ enum simatic_ipc_station_ids { SIMATIC_IPC_IPC477E = 0x00000A02, SIMATIC_IPC_IPC127E = 0x00000D01, SIMATIC_IPC_IPC227G = 0x00000F01, - SIMATIC_IPC_IPC427G = 0x00001001, + SIMATIC_IPC_IPCBX_39A = 0x00001001, }; static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len) -- cgit v1.2.3 From d348b1d761e358a4ba03fb34aa7e3dbd278db236 Mon Sep 17 00:00:00 2001 From: Henning Schild Date: Thu, 22 Dec 2022 11:37:20 +0100 Subject: platform/x86: simatic-ipc: add another model Add IPC PX-39A support. Signed-off-by: Henning Schild Link: https://lore.kernel.org/r/20221222103720.8546-3-henning.schild@siemens.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/simatic-ipc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h index a4a6cba412cb..a48bb5240977 100644 --- a/include/linux/platform_data/x86/simatic-ipc.h +++ b/include/linux/platform_data/x86/simatic-ipc.h @@ -33,6 +33,7 @@ enum simatic_ipc_station_ids { SIMATIC_IPC_IPC127E = 0x00000D01, SIMATIC_IPC_IPC227G = 0x00000F01, SIMATIC_IPC_IPCBX_39A = 0x00001001, + SIMATIC_IPC_IPCPX_39A = 0x00001002, }; static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len) -- cgit v1.2.3 From 16d73129f1fd8e91eb565482245233809647c649 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 11 Jan 2023 13:57:25 +0800 Subject: platform/chrome: fix kernel-doc warnings for panic notifier Fix the following kernel-doc warnings: $ ./scripts/kernel-doc -none drivers/platform/chrome/* drivers/platform/chrome/cros_ec_debugfs.c:54: warning: Function parameter or member 'notifier_panic' not described in 'cros_ec_debugfs' $ ./scripts/kernel-doc -none include/linux/platform_data/cros_ec_proto.h include/linux/platform_data/cros_ec_proto.h:187: warning: Function parameter or member 'panic_notifier' not described in 'cros_ec_device' Cc: Rob Barnes Fixes: d90fa2c64d59 ("platform/chrome: cros_ec: Poll EC log on EC panic") Signed-off-by: Tzung-Bi Shih Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20230111055728.708990-2-tzungbi@kernel.org --- include/linux/platform_data/cros_ec_proto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 017d502ed66e..a4a3fec15504 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -140,6 +140,7 @@ struct cros_ec_command { * main EC. * @pd: The platform_device used by the mfd driver to interface with the * PD behind an EC. + * @panic_notifier: EC panic notifier. */ struct cros_ec_device { /* These are used by other drivers that want to talk to the EC */ -- cgit v1.2.3 From 20eb556dac27427f951ca13e117d32bd1c041b79 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 11 Jan 2023 13:57:26 +0800 Subject: platform/chrome: fix kernel-doc warning for suspend_timeout_ms Fix the following kernel-doc warning: $ ./scripts/kernel-doc -none include/linux/platform_data/cros_ec_proto.h include/linux/platform_data/cros_ec_proto.h:187: warning: Function parameter or member 'suspend_timeout_ms' not described in 'cros_ec_device' Cc: Evan Green Fixes: e8bf17d58a4d ("platform/chrome: cros_ec: Expose suspend_timeout_ms in debugfs") Signed-off-by: Tzung-Bi Shih Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20230111055728.708990-3-tzungbi@kernel.org --- include/linux/platform_data/cros_ec_proto.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index a4a3fec15504..805dcb19a36d 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -131,6 +131,11 @@ struct cros_ec_command { * @event_data: Raw payload transferred with the MKBP event. * @event_size: Size in bytes of the event data. * @host_event_wake_mask: Mask of host events that cause wake from suspend. + * @suspend_timeout_ms: The timeout in milliseconds between when sleep event + * is received and when the EC will declare sleep + * transition failure if the sleep signal is not + * asserted. See also struct + * ec_params_host_sleep_event_v1 in cros_ec_commands.h. * @last_event_time: exact time from the hard irq when we got notified of * a new event. * @notifier_ready: The notifier_block to let the kernel re-query EC -- cgit v1.2.3 From 212c9b9c395f72fd83c10cf2692b9562c2110d0f Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 11 Jan 2023 13:57:27 +0800 Subject: platform/chrome: fix kernel-doc warning for last_resume_result Fix the following kernel-doc warning: $ ./scripts/kernel-doc -none include/linux/platform_data/cros_ec_proto.h include/linux/platform_data/cros_ec_proto.h:187: warning: Function parameter or member 'last_resume_result' not described in 'cros_ec_device' Cc: Evan Green Fixes: 8c3166e17cf1 ("mfd / platform: cros_ec_debugfs: Expose resume result via debugfs") Signed-off-by: Tzung-Bi Shih Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20230111055728.708990-4-tzungbi@kernel.org --- include/linux/platform_data/cros_ec_proto.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 805dcb19a36d..4865c54d4af1 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -136,6 +136,10 @@ struct cros_ec_command { * transition failure if the sleep signal is not * asserted. See also struct * ec_params_host_sleep_event_v1 in cros_ec_commands.h. + * @last_resume_result: The number of sleep power signal transitions that + * occurred since the suspend message. The high bit + * indicates a timeout occurred. See also struct + * ec_response_host_sleep_event_v1 in cros_ec_commands.h. * @last_event_time: exact time from the hard irq when we got notified of * a new event. * @notifier_ready: The notifier_block to let the kernel re-query EC -- cgit v1.2.3 From 5fa1dd818fb4b30cd2de42696de547e243d946d6 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 11 Jan 2023 13:57:28 +0800 Subject: platform/chrome: fix kernel-doc warnings for cros_ec_command Fix the following kernel-doc warnings: $ ./scripts/kernel-doc -none \ include/linux/platform_data/cros_ec_commands.h include/linux/platform_data/cros_ec_commands.h:1092: warning: expecting prototype for struct ec_response_get_cmd_version. Prototype was for struct ec_response_get_cmd_versions instead include/linux/platform_data/cros_ec_commands.h:5485: warning: This comment starts with '/**', but isn't a kernel-doc comment. include/linux/platform_data/cros_ec_commands.h:5496: warning: This comment starts with '/**', but isn't a kernel-doc comment. Signed-off-by: Tzung-Bi Shih Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20230111055728.708990-5-tzungbi@kernel.org --- include/linux/platform_data/cros_ec_commands.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 6665e7da6ee2..b9c4a3964247 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1082,7 +1082,7 @@ struct ec_params_get_cmd_versions_v1 { } __ec_align2; /** - * struct ec_response_get_cmd_version - Response to the get command versions. + * struct ec_response_get_cmd_versions - Response to the get command versions. * @version_mask: Mask of supported versions; use EC_VER_MASK() to compare with * a desired version. */ @@ -5480,7 +5480,7 @@ struct ec_response_rollback_info { /* Issue AP reset */ #define EC_CMD_AP_RESET 0x0125 -/** +/* * Get the number of peripheral charge ports */ #define EC_CMD_PCHG_COUNT 0x0134 @@ -5491,7 +5491,7 @@ struct ec_response_pchg_count { uint8_t port_count; } __ec_align1; -/** +/* * Get the status of a peripheral charge port */ #define EC_CMD_PCHG 0x0135 -- cgit v1.2.3 From 961a325becd9a142ae5c8b258e5c2f221f8bfac8 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 11 Jan 2023 15:41:46 +0800 Subject: platform/chrome: cros_ec: Use per-device lockdep key Lockdep reports a bogus possible deadlock on MT8192 Chromebooks due to the following lock sequences: 1. lock(i2c_register_adapter) [1]; lock(&ec_dev->lock) 2. lock(&ec_dev->lock); lock(prepare_lock); The actual dependency chains are much longer. The shortened version looks somewhat like: 1. cros-ec-rpmsg on mtk-scp ec_dev->lock -> prepare_lock 2. In rt5682_i2c_probe() on native I2C bus: prepare_lock -> regmap->lock -> (possibly) i2c_adapter->bus_lock 3. In rt5682_i2c_probe() on native I2C bus: regmap->lock -> i2c_adapter->bus_lock 4. In sbs_probe() on i2c-cros-ec-tunnel I2C bus attached on cros-ec: i2c_adapter->bus_lock -> ec_dev->lock While lockdep is correct that the shared lockdep classes have a circular dependency, it is bogus because a) 2+3 happen on a native I2C bus b) 4 happens on the actual EC on ChromeOS devices c) 1 happens on the SCP coprocessor on MediaTek Chromebooks that just happens to expose a cros-ec interface, but does not have an i2c-cros-ec-tunnel I2C bus In short, the "dependencies" are actually on different devices. Setup a per-device lockdep key for cros_ec devices so lockdep can tell the two instances apart. This helps with getting rid of the bogus lockdep warning. For ChromeOS devices that only have one cros-ec instance this doesn't change anything. Also add a missing mutex_destroy, just to make the teardown complete. [1] This is likely the per I2C bus lock with shared lockdep class Signed-off-by: Chen-Yu Tsai Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20230111074146.2624496-1-wenst@chromium.org --- include/linux/platform_data/cros_ec_proto.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 4865c54d4af1..4f9f756bc17c 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -9,6 +9,7 @@ #define __LINUX_CROS_EC_PROTO_H #include +#include #include #include @@ -122,6 +123,8 @@ struct cros_ec_command { * command. The caller should check msg.result for the EC's result * code. * @pkt_xfer: Send packet to EC and get response. + * @lockdep_key: Lockdep class for each instance. Unused if CONFIG_LOCKDEP is + * not enabled. * @lock: One transaction at a time. * @mkbp_event_supported: 0 if MKBP not supported. Otherwise its value is * the maximum supported version of the MKBP host event @@ -176,6 +179,7 @@ struct cros_ec_device { struct cros_ec_command *msg); int (*pkt_xfer)(struct cros_ec_device *ec, struct cros_ec_command *msg); + struct lock_class_key lockdep_key; struct mutex lock; u8 mkbp_event_supported; bool host_sleep_v1; -- cgit v1.2.3 From dec5efcffad4f28f5d1fe5dc45b64dffa6abbf04 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 10 Jan 2023 17:07:38 +0100 Subject: u64_stat: Remove the obsolete fetch_irq() variants. There are no more users of the obsolete interface u64_stats_fetch_begin_irq() and u64_stats_fetch_retry_irq(). Remove the obsolete API. [bigeasy: Split out the bits from a larger patch]. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20230110160738.974085-1-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/u64_stats_sync.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 46040d66334a..ffe48e69b3f3 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -213,16 +213,4 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, return __u64_stats_fetch_retry(syncp, start); } -/* Obsolete interfaces */ -static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) -{ - return u64_stats_fetch_begin(syncp); -} - -static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, - unsigned int start) -{ - return u64_stats_fetch_retry(syncp, start); -} - #endif /* _LINUX_U64_STATS_SYNC_H */ -- cgit v1.2.3 From c2977c61f32e0d54319d158a04cad5fd82c2afcf Mon Sep 17 00:00:00 2001 From: Arun Ramadoss Date: Tue, 10 Jan 2023 14:19:20 +0530 Subject: net: dsa: microchip: ptp: add 4 bytes in tail tag when ptp enabled When the PTP is enabled in hardware bit 6 of PTP_MSG_CONF1 register, the transmit frame needs additional 4 bytes before the tail tag. It is needed for all the transmission packets irrespective of PTP packets or not. The 4-byte timestamp field is 0 for frames other than Pdelay_Resp. For the one-step Pdelay_Resp, the switch needs the receive timestamp of the Pdelay_Req message so that it can put the turnaround time in the correction field. Since PTP has to be enabled for both Transmission and reception timestamping, driver needs to track of the tx and rx setting of the all the user ports in the switch. Two flags hw_tx_en and hw_rx_en are added in ksz_port to track the timestampping setting of each port. When any one of ports has tx or rx timestampping enabled, bit 6 of PTP_MSG_CONF1 is set and it is indicated to tag_ksz.c through tagger bytes. This flag adds 4 additional bytes to the tail tag. When tx and rx timestamping of all the ports are disabled, then 4 bytes are not added. Tested using hwstamp -i Signed-off-by: Arun Ramadoss Reviewed-by: Vladimir Oltean # mostly api Signed-off-by: David S. Miller --- include/linux/dsa/ksz_common.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/dsa/ksz_common.h (limited to 'include/linux') diff --git a/include/linux/dsa/ksz_common.h b/include/linux/dsa/ksz_common.h new file mode 100644 index 000000000000..d2a54161be97 --- /dev/null +++ b/include/linux/dsa/ksz_common.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Microchip switch tag common header + * + * Copyright (C) 2022 Microchip Technology Inc. + */ + +#ifndef _NET_DSA_KSZ_COMMON_H_ +#define _NET_DSA_KSZ_COMMON_H_ + +#include + +struct ksz_tagger_data { + void (*hwtstamp_set_state)(struct dsa_switch *ds, bool on); +}; + +static inline struct ksz_tagger_data * +ksz_tagger_data(struct dsa_switch *ds) +{ + return ds->tagger_data; +} + +#endif /* _NET_DSA_KSZ_COMMON_H_ */ -- cgit v1.2.3 From 2955762b372b791a8f4cd862cf0b5fef9c456449 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Tue, 10 Jan 2023 14:19:23 +0530 Subject: net: ptp: add helper for one-step P2P clocks For P2P delay measurement, the ingress time stamp of the PDelay_Req is required for the correction field of the PDelay_Resp. The application echoes back the correction field of the PDelay_Req when sending the PDelay_Resp. Some hardware (like the ZHAW InES PTP time stamping IP core) subtracts the ingress timestamp autonomously from the correction field, so that the hardware only needs to add the egress timestamp on tx. Other hardware (like the Microchip KSZ9563) reports the ingress time stamp via an interrupt and requires that the software provides this time stamp via tail-tag on tx. In order to avoid introducing a further application interface for this, the driver can simply emulate the behavior of the InES device and subtract the ingress time stamp in software from the correction field. On egress, the correction field can either be kept as it is (and the time stamp field in the tail-tag is set to zero) or move the value from the correction field back to the tail-tag. Changing the correction field requires updating the UDP checksum (if UDP is used as transport). Signed-off-by: Christian Eggers Co-developed-by: Arun Ramadoss Signed-off-by: Arun Ramadoss Signed-off-by: David S. Miller --- include/linux/ptp_classify.h | 73 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 2b6ea36ad162..1b5a953c6bbc 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -10,8 +10,12 @@ #ifndef _PTP_CLASSIFY_H_ #define _PTP_CLASSIFY_H_ +#include #include +#include #include +#include +#include #define PTP_CLASS_NONE 0x00 /* not a PTP event message */ #define PTP_CLASS_V1 0x01 /* protocol version 1 */ @@ -129,6 +133,69 @@ static inline u8 ptp_get_msgtype(const struct ptp_header *hdr, return msgtype; } +/** + * ptp_check_diff8 - Computes new checksum (when altering a 64-bit field) + * @old: old field value + * @new: new field value + * @oldsum: previous checksum + * + * This function can be used to calculate a new checksum when only a single + * field is changed. Similar as ip_vs_check_diff*() in ip_vs.h. + * + * Return: Updated checksum + */ +static inline __wsum ptp_check_diff8(__be64 old, __be64 new, __wsum oldsum) +{ + __be64 diff[2] = { ~old, new }; + + return csum_partial(diff, sizeof(diff), oldsum); +} + +/** + * ptp_header_update_correction - Update PTP header's correction field + * @skb: packet buffer + * @type: type of the packet (see ptp_classify_raw()) + * @hdr: ptp header + * @correction: new correction value + * + * This updates the correction field of a PTP header and updates the UDP + * checksum (if UDP is used as transport). It is needed for hardware capable of + * one-step P2P that does not already modify the correction field of Pdelay_Req + * event messages on ingress. + */ +static inline +void ptp_header_update_correction(struct sk_buff *skb, unsigned int type, + struct ptp_header *hdr, s64 correction) +{ + __be64 correction_old; + struct udphdr *uhdr; + + /* previous correction value is required for checksum update. */ + memcpy(&correction_old, &hdr->correction, sizeof(correction_old)); + + /* write new correction value */ + put_unaligned_be64((u64)correction, &hdr->correction); + + switch (type & PTP_CLASS_PMASK) { + case PTP_CLASS_IPV4: + case PTP_CLASS_IPV6: + /* locate udp header */ + uhdr = (struct udphdr *)((char *)hdr - sizeof(struct udphdr)); + break; + default: + return; + } + + /* update checksum */ + uhdr->check = csum_fold(ptp_check_diff8(correction_old, + hdr->correction, + ~csum_unfold(uhdr->check))); + if (!uhdr->check) + uhdr->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_NONE; +} + /** * ptp_msg_is_sync - Evaluates whether the given skb is a PTP Sync message * @skb: packet buffer @@ -166,5 +233,11 @@ static inline bool ptp_msg_is_sync(struct sk_buff *skb, unsigned int type) { return false; } + +static inline +void ptp_header_update_correction(struct sk_buff *skb, unsigned int type, + struct ptp_header *hdr, s64 correction) +{ +} #endif #endif /* _PTP_CLASSIFY_H_ */ -- cgit v1.2.3 From 90188fff655d8efad79acdf60c3012ffcbbef716 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Tue, 10 Jan 2023 14:19:24 +0530 Subject: net: dsa: microchip: ptp: add packet reception timestamping Rx Timestamping is done through 4 additional bytes in tail tag. Whenever the ptp packet is received, the 4 byte hardware time stamped value is added before 1 byte tail tag. Also, bit 7 in tail tag indicates it as PTP frame. This 4 byte value is extracted from the tail tag and reconstructed to absolute time and assigned to skb hwtstamp. If the packet received in PDelay_Resp, then partial ingress timestamp is subtracted from the correction field. Since user space tools expects to be done in hardware. Signed-off-by: Christian Eggers Co-developed-by: Arun Ramadoss Signed-off-by: Arun Ramadoss Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/ksz_common.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/ksz_common.h b/include/linux/dsa/ksz_common.h index d2a54161be97..a256b08d837d 100644 --- a/include/linux/dsa/ksz_common.h +++ b/include/linux/dsa/ksz_common.h @@ -9,10 +9,31 @@ #include +/* All time stamps from the KSZ consist of 2 bits for seconds and 30 bits for + * nanoseconds. This is NOT the same as 32 bits for nanoseconds. + */ +#define KSZ_TSTAMP_SEC_MASK GENMASK(31, 30) +#define KSZ_TSTAMP_NSEC_MASK GENMASK(29, 0) + +static inline ktime_t ksz_decode_tstamp(u32 tstamp) +{ + u64 ns = FIELD_GET(KSZ_TSTAMP_SEC_MASK, tstamp) * NSEC_PER_SEC + + FIELD_GET(KSZ_TSTAMP_NSEC_MASK, tstamp); + + return ns_to_ktime(ns); +} + struct ksz_tagger_data { void (*hwtstamp_set_state)(struct dsa_switch *ds, bool on); }; +struct ksz_skb_cb { + u32 tstamp; +}; + +#define KSZ_SKB_CB(skb) \ + ((struct ksz_skb_cb *)((skb)->cb)) + static inline struct ksz_tagger_data * ksz_tagger_data(struct dsa_switch *ds) { -- cgit v1.2.3 From ab32f56a4100f879c5064b45c7657bb4be175ac3 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Tue, 10 Jan 2023 14:19:25 +0530 Subject: net: dsa: microchip: ptp: add packet transmission timestamping This patch adds the routines for transmission of ptp packets. When the ptp pdelay_req packet to be transmitted, it uses the deferred xmit worker to schedule the packets. During irq_setup, interrupt for Sync, Pdelay_req and Pdelay_rsp are enabled. So interrupt is triggered for all three packets. But for p2p1step, we require only time stamp of Pdelay_req packet. Hence to avoid posting of the completion from ISR routine for Sync and Pdelay_resp packets, ts_en flag is introduced. This controls which packets need to processed for timestamp. After the packet is transmitted, ISR is triggered. The time at which packet transmitted is recorded to separate register. This value is reconstructed to absolute time and posted to the user application through socket error queue. Signed-off-by: Christian Eggers Co-developed-by: Arun Ramadoss Signed-off-by: Arun Ramadoss Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/ksz_common.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/ksz_common.h b/include/linux/dsa/ksz_common.h index a256b08d837d..b91beab5e138 100644 --- a/include/linux/dsa/ksz_common.h +++ b/include/linux/dsa/ksz_common.h @@ -23,11 +23,19 @@ static inline ktime_t ksz_decode_tstamp(u32 tstamp) return ns_to_ktime(ns); } +struct ksz_deferred_xmit_work { + struct dsa_port *dp; + struct sk_buff *skb; + struct kthread_work work; +}; + struct ksz_tagger_data { + void (*xmit_work_fn)(struct kthread_work *work); void (*hwtstamp_set_state)(struct dsa_switch *ds, bool on); }; struct ksz_skb_cb { + struct sk_buff *clone; u32 tstamp; }; -- cgit v1.2.3 From a32190b154bde4a3bf2fddb9367aec49be09b15d Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Tue, 10 Jan 2023 14:19:26 +0530 Subject: net: dsa: microchip: ptp: move pdelay_rsp correction field to tail tag For PDelay_Resp messages we will likely have a negative value in the correction field. The switch hardware cannot correctly update such values (produces an off by one error in the UDP checksum), so it must be moved to the time stamp field in the tail tag. Format of the correction field is 48 bit ns + 16 bit fractional ns. After updating the correction field, clone is no longer required hence it is freed. Signed-off-by: Christian Eggers Co-developed-by: Arun Ramadoss Signed-off-by: Arun Ramadoss Signed-off-by: David S. Miller --- include/linux/dsa/ksz_common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/ksz_common.h b/include/linux/dsa/ksz_common.h index b91beab5e138..576a99ca698d 100644 --- a/include/linux/dsa/ksz_common.h +++ b/include/linux/dsa/ksz_common.h @@ -36,6 +36,8 @@ struct ksz_tagger_data { struct ksz_skb_cb { struct sk_buff *clone; + unsigned int ptp_type; + bool update_correction; u32 tstamp; }; -- cgit v1.2.3 From 31de2842399ae68c4b2887ffeaedebb7934f343e Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 11 Jan 2023 14:05:18 +0100 Subject: ethtool: add tx aggregation parameters Add the following ethtool tx aggregation parameters: ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES Maximum size in bytes of a tx aggregated block of frames. ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES Maximum number of frames that can be aggregated into a block. ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS Time in usecs after the first packet arrival in an aggregated block for the block to be sent. Signed-off-by: Daniele Palmas Signed-off-by: David S. Miller --- include/linux/ethtool.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index d0da303f6634..20d197693d34 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -217,6 +217,9 @@ __ethtool_get_link_ksettings(struct net_device *dev, struct kernel_ethtool_coalesce { u8 use_cqe_mode_tx; u8 use_cqe_mode_rx; + u32 tx_aggr_max_bytes; + u32 tx_aggr_max_frames; + u32 tx_aggr_time_usecs; }; /** @@ -260,7 +263,10 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL BIT(21) #define ETHTOOL_COALESCE_USE_CQE_RX BIT(22) #define ETHTOOL_COALESCE_USE_CQE_TX BIT(23) -#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(23, 0) +#define ETHTOOL_COALESCE_TX_AGGR_MAX_BYTES BIT(24) +#define ETHTOOL_COALESCE_TX_AGGR_MAX_FRAMES BIT(25) +#define ETHTOOL_COALESCE_TX_AGGR_TIME_USECS BIT(26) +#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(26, 0) #define ETHTOOL_COALESCE_USECS \ (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) @@ -288,6 +294,10 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL) #define ETHTOOL_COALESCE_USE_CQE \ (ETHTOOL_COALESCE_USE_CQE_RX | ETHTOOL_COALESCE_USE_CQE_TX) +#define ETHTOOL_COALESCE_TX_AGGR \ + (ETHTOOL_COALESCE_TX_AGGR_MAX_BYTES | \ + ETHTOOL_COALESCE_TX_AGGR_MAX_FRAMES | \ + ETHTOOL_COALESCE_TX_AGGR_TIME_USECS) #define ETHTOOL_STAT_NOT_SET (~0ULL) -- cgit v1.2.3 From 0c5ffc3d7b15978c6b184938cd6b8af06e436424 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Jan 2023 20:43:26 +0100 Subject: cpuidle, dt: Push RCU-idle into driver Doing RCU-idle outside the driver, only to then temporarily enable it again before going idle is suboptimal. Notably: this converts all dt_init_idle_driver() and __CPU_PM_CPU_IDLE_ENTER() users for they are inextrably intertwined. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Tony Lindgren Tested-by: Ulf Hansson Acked-by: Rafael J. Wysocki Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230112195540.068981667@infradead.org --- include/linux/cpuidle.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index fce476275e16..0ddc11e44302 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -289,7 +289,9 @@ extern s64 cpuidle_governor_latency_req(unsigned int cpu); if (!is_retention) \ __ret = cpu_pm_enter(); \ if (!__ret) { \ + ct_idle_enter(); \ __ret = low_level_idle_enter(state); \ + ct_idle_exit(); \ if (!is_retention) \ cpu_pm_exit(); \ } \ -- cgit v1.2.3 From a01353cf1896ea5b8a7bbc5e2b2d38feed8b7aaa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Jan 2023 20:43:27 +0100 Subject: cpuidle: Fix ct_idle_*() usage The whole disable-RCU, enable-IRQS dance is very intricate since changing IRQ state is traced, which depends on RCU. Add two helpers for the cpuidle case that mirror the entry code: ct_cpuidle_enter() ct_cpuidle_exit() And fix all the cases where the enter/exit dance was buggy. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Tony Lindgren Tested-by: Ulf Hansson Acked-by: Rafael J. Wysocki Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230112195540.130014793@infradead.org --- include/linux/clockchips.h | 4 ++-- include/linux/cpuidle.h | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 8ae9a95ebf5b..9aac31d856f3 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -211,7 +211,7 @@ extern int tick_receive_broadcast(void); extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); # else -static inline int tick_check_broadcast_expired(void) { return 0; } +static __always_inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) { } # endif @@ -219,7 +219,7 @@ static inline void tick_setup_hrtimer_broadcast(void) { } static inline void clockevents_suspend(void) { } static inline void clockevents_resume(void) { } -static inline int tick_check_broadcast_expired(void) { return 0; } +static __always_inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 0ddc11e44302..630c879143c7 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -14,6 +14,7 @@ #include #include #include +#include #define CPUIDLE_STATE_MAX 10 #define CPUIDLE_NAME_LEN 16 @@ -115,6 +116,35 @@ struct cpuidle_device { DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev); +static __always_inline void ct_cpuidle_enter(void) +{ + lockdep_assert_irqs_disabled(); + /* + * Idle is allowed to (temporary) enable IRQs. It + * will return with IRQs disabled. + * + * Trace IRQs enable here, then switch off RCU, and have + * arch_cpu_idle() use raw_local_irq_enable(). Note that + * ct_idle_enter() relies on lockdep IRQ state, so switch that + * last -- this is very similar to the entry code. + */ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(); + instrumentation_end(); + ct_idle_enter(); + lockdep_hardirqs_on(_RET_IP_); +} + +static __always_inline void ct_cpuidle_exit(void) +{ + /* + * Carefully undo the above. + */ + lockdep_hardirqs_off(_RET_IP_); + ct_idle_exit(); + instrumentation_begin(); +} + /**************************** * CPUIDLE DRIVER INTERFACE * ****************************/ @@ -289,9 +319,9 @@ extern s64 cpuidle_governor_latency_req(unsigned int cpu); if (!is_retention) \ __ret = cpu_pm_enter(); \ if (!__ret) { \ - ct_idle_enter(); \ + ct_cpuidle_enter(); \ __ret = low_level_idle_enter(state); \ - ct_idle_exit(); \ + ct_cpuidle_exit(); \ if (!is_retention) \ cpu_pm_exit(); \ } \ -- cgit v1.2.3 From 2b5a0e425e6e319b1978db1e9564f6af4228a567 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Jan 2023 20:43:31 +0100 Subject: objtool/idle: Validate __cpuidle code as noinstr Idle code is very like entry code in that RCU isn't available. As such, add a little validation. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Tony Lindgren Tested-by: Ulf Hansson Acked-by: Geert Uytterhoeven Acked-by: Rafael J. Wysocki Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230112195540.373461409@infradead.org --- include/linux/compiler_types.h | 8 ++++++-- include/linux/cpu.h | 3 --- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 7c1afe0f4129..d7858901f035 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -232,11 +232,15 @@ struct ftrace_likely_data { #endif /* Section for code which can't be instrumented at all */ -#define noinstr \ - noinline notrace __attribute((__section__(".noinstr.text"))) \ +#define __noinstr_section(section) \ + noinline notrace __attribute((__section__(section))) \ __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \ __no_sanitize_memory +#define noinstr __noinstr_section(".noinstr.text") + +#define __cpuidle __noinstr_section(".cpuidle.text") + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 314802f98b9d..f83e4519c5f0 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -176,9 +176,6 @@ void __noreturn cpu_startup_entry(enum cpuhp_state state); void cpu_idle_poll_ctrl(bool enable); -/* Attach to any functions which should be considered cpuidle. */ -#define __cpuidle __section(".cpuidle.text") - bool cpu_in_idle(unsigned long pc); void arch_cpu_idle(void); -- cgit v1.2.3 From e4df1511e1f4bcaa0d590aa7bbffe8bbbd6dfb49 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Jan 2023 20:43:41 +0100 Subject: cpuidle, sched: Remove instrumentation from TIF_{POLLING_NRFLAG,NEED_RESCHED} objtool pointed out that various idle-TIF management methods have instrumentation: vmlinux.o: warning: objtool: mwait_idle+0x5: call to current_set_polling_and_test() leaves .noinstr.text section vmlinux.o: warning: objtool: acpi_processor_ffh_cstate_enter+0xc5: call to current_set_polling_and_test() leaves .noinstr.text section vmlinux.o: warning: objtool: cpu_idle_poll.isra.0+0x73: call to test_ti_thread_flag() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle+0xbc: call to current_set_polling_and_test() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle_irq+0xea: call to current_set_polling_and_test() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle_s2idle+0xb4: call to current_set_polling_and_test() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle+0xa6: call to current_clr_polling() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle_irq+0xbf: call to current_clr_polling() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle_s2idle+0xa1: call to current_clr_polling() leaves .noinstr.text section vmlinux.o: warning: objtool: mwait_idle+0xe: call to __current_set_polling() leaves .noinstr.text section vmlinux.o: warning: objtool: acpi_processor_ffh_cstate_enter+0xc5: call to __current_set_polling() leaves .noinstr.text section vmlinux.o: warning: objtool: cpu_idle_poll.isra.0+0x73: call to test_ti_thread_flag() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle+0xbc: call to __current_set_polling() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle_irq+0xea: call to __current_set_polling() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle_s2idle+0xb4: call to __current_set_polling() leaves .noinstr.text section vmlinux.o: warning: objtool: cpu_idle_poll.isra.0+0x73: call to test_ti_thread_flag() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle_s2idle+0x73: call to test_ti_thread_flag.constprop.0() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle_irq+0x91: call to test_ti_thread_flag.constprop.0() leaves .noinstr.text section vmlinux.o: warning: objtool: intel_idle+0x78: call to test_ti_thread_flag.constprop.0() leaves .noinstr.text section vmlinux.o: warning: objtool: acpi_safe_halt+0xf: call to test_ti_thread_flag.constprop.0() leaves .noinstr.text section Remove the instrumentation, because these methods are used in low-level cpuidle code moving between states, that should not be instrumented. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Tony Lindgren Tested-by: Ulf Hansson Acked-by: Rafael J. Wysocki Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230112195540.988741683@infradead.org --- include/linux/sched/idle.h | 40 ++++++++++++++++++++++++++++++---------- include/linux/thread_info.h | 18 +++++++++++++++++- 2 files changed, 47 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index d73d314d59c6..478084f9105e 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -23,12 +23,37 @@ static inline void wake_up_if_idle(int cpu) { } */ #ifdef TIF_POLLING_NRFLAG -static inline void __current_set_polling(void) +#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_ATOMIC_H + +static __always_inline void __current_set_polling(void) { - set_thread_flag(TIF_POLLING_NRFLAG); + arch_set_bit(TIF_POLLING_NRFLAG, + (unsigned long *)(¤t_thread_info()->flags)); } -static inline bool __must_check current_set_polling_and_test(void) +static __always_inline void __current_clr_polling(void) +{ + arch_clear_bit(TIF_POLLING_NRFLAG, + (unsigned long *)(¤t_thread_info()->flags)); +} + +#else + +static __always_inline void __current_set_polling(void) +{ + set_bit(TIF_POLLING_NRFLAG, + (unsigned long *)(¤t_thread_info()->flags)); +} + +static __always_inline void __current_clr_polling(void) +{ + clear_bit(TIF_POLLING_NRFLAG, + (unsigned long *)(¤t_thread_info()->flags)); +} + +#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_ATOMIC_H */ + +static __always_inline bool __must_check current_set_polling_and_test(void) { __current_set_polling(); @@ -41,12 +66,7 @@ static inline bool __must_check current_set_polling_and_test(void) return unlikely(tif_need_resched()); } -static inline void __current_clr_polling(void) -{ - clear_thread_flag(TIF_POLLING_NRFLAG); -} - -static inline bool __must_check current_clr_polling_and_test(void) +static __always_inline bool __must_check current_clr_polling_and_test(void) { __current_clr_polling(); @@ -73,7 +93,7 @@ static inline bool __must_check current_clr_polling_and_test(void) } #endif -static inline void current_clr_polling(void) +static __always_inline void current_clr_polling(void) { __current_clr_polling(); diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 9f392ec76f2b..c02646884fa8 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -177,7 +177,23 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti clear_ti_thread_flag(task_thread_info(t), TIF_##fl) #endif /* !CONFIG_GENERIC_ENTRY */ -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) +#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H + +static __always_inline bool tif_need_resched(void) +{ + return arch_test_bit(TIF_NEED_RESCHED, + (unsigned long *)(¤t_thread_info()->flags)); +} + +#else + +static __always_inline bool tif_need_resched(void) +{ + return test_bit(TIF_NEED_RESCHED, + (unsigned long *)(¤t_thread_info()->flags)); +} + +#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES static inline int arch_within_stack_frames(const void * const stack, -- cgit v1.2.3 From 6a123d6ae6ea930b9bb3c595ceac2b2f93039f67 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Jan 2023 20:43:46 +0100 Subject: cpuidle, ACPI: Make noinstr clean objtool found cases where ACPI methods called out into instrumentation code: vmlinux.o: warning: objtool: io_idle+0xc: call to __inb.isra.0() leaves .noinstr.text section vmlinux.o: warning: objtool: acpi_idle_enter+0xfe: call to num_online_cpus() leaves .noinstr.text section vmlinux.o: warning: objtool: acpi_idle_enter+0x115: call to acpi_idle_fallback_to_c1.isra.0() leaves .noinstr.text section Fix this by: marking the IO in/out, acpi_idle_fallback_to_c1() and num_online_cpus() methods as __always_inline. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Tony Lindgren Tested-by: Ulf Hansson Acked-by: Rafael J. Wysocki Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230112195541.294846301@infradead.org --- include/linux/cpumask.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c2aa0aa26b45..d45e5de13721 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1017,9 +1017,9 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held * region. */ -static inline unsigned int num_online_cpus(void) +static __always_inline unsigned int num_online_cpus(void) { - return atomic_read(&__num_online_cpus); + return arch_atomic_read(&__num_online_cpus); } #define num_possible_cpus() cpumask_weight(cpu_possible_mask) #define num_present_cpus() cpumask_weight(cpu_present_mask) -- cgit v1.2.3 From 408b961146be4c1a776ce285c3c289afab15298a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Jan 2023 20:43:48 +0100 Subject: tracing: WARN on rcuidle ARCH_WANTS_NO_INSTR (a superset of CONFIG_GENERIC_ENTRY) disallows any and all tracing when RCU isn't enabled. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Tony Lindgren Tested-by: Ulf Hansson Acked-by: Rafael J. Wysocki Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230112195541.416110581@infradead.org --- include/linux/tracepoint.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 4b33b95eb8be..552f80b8362f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -177,6 +177,17 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DO_TRACE_CALL(name, args) __traceiter_##name(NULL, args) #endif /* CONFIG_HAVE_STATIC_CALL */ +/* + * ARCH_WANTS_NO_INSTR archs are expected to have sanitized entry and idle + * code that disallow any/all tracing/instrumentation when RCU isn't watching. + */ +#ifdef CONFIG_ARCH_WANTS_NO_INSTR +#define RCUIDLE_COND(rcuidle) (rcuidle) +#else +/* srcu can't be used from NMI */ +#define RCUIDLE_COND(rcuidle) (rcuidle && in_nmi()) +#endif + /* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. @@ -188,8 +199,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) if (!(cond)) \ return; \ \ - /* srcu can't be used from NMI */ \ - WARN_ON_ONCE(rcuidle && in_nmi()); \ + if (WARN_ON_ONCE(RCUIDLE_COND(rcuidle))) \ + return; \ \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ -- cgit v1.2.3 From f176d4ccb30747231831f66779697afa6d738b3c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Jan 2023 20:43:59 +0100 Subject: sched/core: Always inline __this_cpu_preempt_check() Quite a few unnecessary instrumentation calls are generated via the no-op __this_cpu_preempt_check() call, if it gets uninlined by the compiler: vmlinux.o: warning: objtool: in_entry_stack+0x9: call to __this_cpu_preempt_check() leaves .noinstr.text section vmlinux.o: warning: objtool: default_do_nmi+0x10: call to __this_cpu_preempt_check() leaves .noinstr.text section vmlinux.o: warning: objtool: fpu_idle_fpregs+0x41: call to __this_cpu_preempt_check() leaves .noinstr.text section vmlinux.o: warning: objtool: kvm_read_and_reset_apf_flags+0x1: call to __this_cpu_preempt_check() leaves .noinstr.text section vmlinux.o: warning: objtool: lockdep_hardirqs_on+0xb0: call to __this_cpu_preempt_check() leaves .noinstr.text section vmlinux.o: warning: objtool: lockdep_hardirqs_off+0xae: call to __this_cpu_preempt_check() leaves .noinstr.text section vmlinux.o: warning: objtool: irqentry_nmi_enter+0x69: call to __this_cpu_preempt_check() leaves .noinstr.text section vmlinux.o: warning: objtool: irqentry_nmi_exit+0x32: call to __this_cpu_preempt_check() leaves .noinstr.text section vmlinux.o: warning: objtool: acpi_processor_ffh_cstate_enter+0x9: call to __this_cpu_preempt_check() leaves .noinstr.text section vmlinux.o: warning: objtool: acpi_idle_enter+0x43: call to __this_cpu_preempt_check() leaves .noinstr.text section vmlinux.o: warning: objtool: acpi_idle_enter_s2idle+0x45: call to __this_cpu_preempt_check() leaves .noinstr.text section Mark it __always_inline. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Tony Lindgren Tested-by: Ulf Hansson Acked-by: Rafael J. Wysocki Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230112195542.089981974@infradead.org --- include/linux/percpu-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index af1071535de8..e60727be79c4 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -310,7 +310,7 @@ extern void __bad_size_call_parameter(void); #ifdef CONFIG_DEBUG_PREEMPT extern void __this_cpu_preempt_check(const char *op); #else -static inline void __this_cpu_preempt_check(const char *op) { } +static __always_inline void __this_cpu_preempt_check(const char *op) { } #endif #define __pcpu_size_call_return(stem, variable) \ -- cgit v1.2.3 From 0e985e9d22864e29d5d2b3d909ad15134d7f6d46 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Jan 2023 20:44:04 +0100 Subject: cpuidle: Add comments about noinstr/__cpuidle usage Add a few words on noinstr / __cpuidle usage. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230112195542.397238052@infradead.org --- include/linux/compiler_types.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index d7858901f035..dea5bf5bd09c 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -239,6 +239,16 @@ struct ftrace_likely_data { #define noinstr __noinstr_section(".noinstr.text") +/* + * The __cpuidle section is used twofold: + * + * 1) the original use -- identifying if a CPU is 'stuck' in idle state based + * on it's instruction pointer. See cpu_in_idle(). + * + * 2) supressing instrumentation around where cpuidle disables RCU; where the + * function isn't strictly required for #1, this is interchangeable with + * noinstr. + */ #define __cpuidle __noinstr_section(".cpuidle.text") #endif /* __KERNEL__ */ -- cgit v1.2.3 From 5b6373de4351debd9fa87f1c46442b2c28d8b18e Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 19 Dec 2022 17:05:16 +0100 Subject: drm/fbdev: Remove aperture handling and FBINFO_MISC_FIRMWARE There are no users left of struct fb_info.apertures and the flag FBINFO_MISC_FIRMWARE. Remove both and the aperture-ownership code in the fbdev core. All code for aperture ownership is now located in the fbdev drivers. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20221219160516.23436-19-tzimmermann@suse.de --- include/linux/fb.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 96b96323e9cb..30183fd259ae 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -423,8 +423,6 @@ struct fb_tile_ops { */ #define FBINFO_MISC_ALWAYS_SETPAR 0x40000 -/* where the fb is a firmware driver, and can be replaced with a proper one */ -#define FBINFO_MISC_FIRMWARE 0x80000 /* * Host and GPU endianness differ. */ @@ -499,30 +497,10 @@ struct fb_info { void *fbcon_par; /* fbcon use-only private area */ /* From here on everything is device dependent */ void *par; - /* we need the PCI or similar aperture base/size not - smem_start/size as smem_start may just be an object - allocated inside the aperture so may not actually overlap */ - struct apertures_struct { - unsigned int count; - struct aperture { - resource_size_t base; - resource_size_t size; - } ranges[0]; - } *apertures; bool skip_vt_switch; /* no VT switch on suspend/resume required */ }; -static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { - struct apertures_struct *a; - - a = kzalloc(struct_size(a, ranges, max_num), GFP_KERNEL); - if (!a) - return NULL; - a->count = max_num; - return a; -} - #define FBINFO_FLAG_DEFAULT FBINFO_DEFAULT /* This will go away -- cgit v1.2.3 From 6caeb33fa986151f745fc62190bc28a593b8a0d2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 Jan 2023 10:54:05 +0800 Subject: iommu: Add set_platform_dma_ops iommu ops When VFIO finishes assigning a device to user space and calls iommu_group_release_dma_owner() to return the device to kernel, the IOMMU core will attach the default domain to the device. Unfortunately, some IOMMU drivers don't support default domain, hence in the end, the core calls .detach_dev instead. This adds set_platform_dma_ops iommu ops to make it clear that what it does is returning control back to the platform DMA ops. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230110025408.667767-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 46e1347bfa22..7b3e3775b069 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -228,6 +228,9 @@ struct iommu_iotlb_gather { * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain + * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op + * is to support old IOMMU drivers, new drivers should use + * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -256,6 +259,7 @@ struct iommu_ops { struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); + void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); /* Request/Free a list of reserved regions for a device */ -- cgit v1.2.3 From dd8a25c557e109f868430bd2e3e8f394cb40eaa7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jan 2023 10:54:07 +0800 Subject: iommu: Remove deferred attach check from __iommu_detach_device() At the current moment, __iommu_detach_device() is only called via call chains that are after the device driver is attached - eg via explicit attach APIs called by the device driver. Commit bd421264ed30 ("iommu: Fix deferred domain attachment") has removed deferred domain attachment check from __iommu_attach_device() path, so it should just unconditionally work in the __iommu_detach_device() path. It actually looks like a bug that we were blocking detach on these paths since the attach was unconditional and the caller is going to free the (probably) UNAMANGED domain once this returns. The only place we should be testing for deferred attach is during the initial point the dma device is linked to the group, and then again during the dma api calls. Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230110025408.667767-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7b3e3775b069..0d10566b3cb2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -405,6 +405,7 @@ struct iommu_fault_param { * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data * @max_pasids: number of PASIDs this device can consume + * @attach_deferred: the dma domain attachment is deferred * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -417,6 +418,7 @@ struct dev_iommu { struct iommu_device *iommu_dev; void *priv; u32 max_pasids; + u32 attach_deferred:1; }; int iommu_device_register(struct iommu_device *iommu, -- cgit v1.2.3 From 8f9930fa016134ea07db4775ec596b16c3d03f05 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 Jan 2023 10:54:08 +0800 Subject: iommu: Remove detach_dev callback The detach_dev callback of domain ops is not called in the IOMMU core. Remove this callback to avoid dead code. The trace event for detaching domain from device is removed accordingly. Reviewed-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230110025408.667767-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0d10566b3cb2..a8063f26ff69 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -299,7 +299,6 @@ struct iommu_ops { * * EBUSY - device is attached to a domain and cannot be changed * * ENODEV - device specific errors, not able to be attached * * - treated as ENODEV by the caller. Use is discouraged - * @detach_dev: detach an iommu domain from a device * @set_dev_pasid: set an iommu domain to a pasid of device * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to @@ -320,7 +319,6 @@ struct iommu_ops { */ struct iommu_domain_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*detach_dev)(struct iommu_domain *domain, struct device *dev); int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); -- cgit v1.2.3 From d3f450533bbcb6dd4d7d59cadc9b61b7321e4ac1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 9 Jan 2023 10:44:31 +0100 Subject: efi: tpm: Avoid READ_ONCE() for accessing the event log Nathan reports that recent kernels built with LTO will crash when doing EFI boot using Fedora's GRUB and SHIM. The culprit turns out to be a misaligned load from the TPM event log, which is annotated with READ_ONCE(), and under LTO, this gets translated into a LDAR instruction which does not tolerate misaligned accesses. Interestingly, this does not happen when booting the same kernel straight from the UEFI shell, and so the fact that the event log may appear misaligned in memory may be caused by a bug in GRUB or SHIM. However, using READ_ONCE() to access firmware tables is slightly unusual in any case, and here, we only need to ensure that 'event' is not dereferenced again after it gets unmapped, but this is already taken care of by the implicit barrier() semantics of the early_memunmap() call. Cc: Cc: Peter Jones Cc: Jarkko Sakkinen Cc: Matthew Garrett Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1782 Signed-off-by: Ard Biesheuvel --- include/linux/tpm_eventlog.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 20c0ff54b7a0..7d68a5cc5881 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -198,8 +198,8 @@ static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *ev * The loop below will unmap these fields if the log is larger than * one page, so save them here for reference: */ - count = READ_ONCE(event->count); - event_type = READ_ONCE(event->event_type); + count = event->count; + event_type = event->event_type; /* Verify that it's the log header */ if (event_header->pcr_idx != 0 || -- cgit v1.2.3 From 9f820fc0651c32f8dde26b8e3ad93e1b9fdb62c4 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 12 Jan 2023 10:36:35 +0100 Subject: mtd: rawnand: Check the data only read pattern only once Instead of checking if a pattern is supported each time we need it, let's create a bitfield that only the core would be allowed to fill at startup time. The core and the individual drivers may then use it in order to check what operation they should use. This bitfield is supposed to grow over time. Signed-off-by: Miquel Raynal Tested-by: Liao Jaime Link: https://lore.kernel.org/linux-mtd/20230112093637.987838-2-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index dcf90144d70b..28c5dce782dd 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1094,10 +1094,18 @@ struct nand_controller_ops { * * @lock: lock used to serialize accesses to the NAND controller * @ops: NAND controller operations. + * @supported_op: NAND controller known-to-be-supported operations, + * only writable by the core after initial checking. + * @supported_op.data_only_read: The controller supports reading more data from + * the bus without restarting an entire read operation nor + * changing the column. */ struct nand_controller { struct mutex lock; const struct nand_controller_ops *ops; + struct { + unsigned int data_only_read: 1; + } supported_op; }; static inline void nand_controller_init(struct nand_controller *nfc) -- cgit v1.2.3 From 003fe4b9545b83cca4f7a7633695d1f69a0b0011 Mon Sep 17 00:00:00 2001 From: JaimeLiao Date: Thu, 12 Jan 2023 10:36:37 +0100 Subject: mtd: rawnand: Support for sequential cache reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for sequential cache reads for controllers using the generic core helpers for their fast read/write helpers. Sequential reads may reduce the overhead when accessing physically continuous data by loading in cache the next page while the previous page gets sent out on the NAND bus. The ONFI specification provides the following additional commands to handle sequential cached reads: * 0x31 - READ CACHE SEQUENTIAL: Requires the NAND chip to load the next page into cache while keeping the current cache available for host reads. * 0x3F - READ CACHE END: Tells the NAND chip this is the end of the sequential cache read, the current cache shall remain accessible for the host but no more internal cache loading operation is required. On the bus, a multi page read operation is currently handled like this: 00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN 00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN 00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN Sequential cached reads may instead be achieved with: 00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \ 31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \ 31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \ 3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN Below are the read speed test results with regular reads and sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode with a NAND chip characterized with the following timings: * tR: 20 µs * tRCBSY: 5 µs * tRR: 20 ns and the following geometry: * device size: 2 MiB * eraseblock size: 128 kiB * page size: 2 kiB ============= Normal read @ 33MHz ================= mtd_speedtest: eraseblock read speed is 15633 KiB/s mtd_speedtest: page read speed is 15515 KiB/s mtd_speedtest: 2 page read speed is 15398 KiB/s =================================================== ========= Sequential cache read @ 33MHz =========== mtd_speedtest: eraseblock read speed is 18285 KiB/s mtd_speedtest: page read speed is 15875 KiB/s mtd_speedtest: 2 page read speed is 16253 KiB/s =================================================== We observe an overall speed improvement of about 5% when reading 2 pages, up to 15% when reading an entire block. This is due to the ~14us gain on each additional page read (tR - (tRCBSY + tRR)). Co-developed-by: Miquel Raynal Signed-off-by: Miquel Raynal Signed-off-by: JaimeLiao Tested-by: Liao Jaime Link: https://lore.kernel.org/linux-mtd/20230112093637.987838-4-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 28c5dce782dd..1b0936fe3c6e 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -67,6 +67,8 @@ struct gpio_desc; /* Extended commands for large page devices */ #define NAND_CMD_READSTART 0x30 +#define NAND_CMD_READCACHESEQ 0x31 +#define NAND_CMD_READCACHEEND 0x3f #define NAND_CMD_RNDOUTSTART 0xE0 #define NAND_CMD_CACHEDPROG 0x15 @@ -1099,12 +1101,14 @@ struct nand_controller_ops { * @supported_op.data_only_read: The controller supports reading more data from * the bus without restarting an entire read operation nor * changing the column. + * @supported_op.cont_read: The controller supports sequential cache reads. */ struct nand_controller { struct mutex lock; const struct nand_controller_ops *ops; struct { unsigned int data_only_read: 1; + unsigned int cont_read: 1; } supported_op; }; @@ -1308,6 +1312,11 @@ struct nand_chip { int read_retries; struct nand_secure_region *secure_regions; u8 nr_secure_regions; + struct { + bool ongoing; + unsigned int first_page; + unsigned int last_page; + } cont_read; /* Externals */ struct nand_controller *controller; -- cgit v1.2.3 From fa5bde139ee43ab91087c01e690c61aec957c339 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 27 Oct 2022 13:07:55 +0900 Subject: ata: libata: Introduce ata_ncq_supported() Introduce the inline helper function ata_ncq_supported() to test if a device supports NCQ commands. The function ata_ncq_enabled() is also rewritten using this new helper function. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Niklas Cassel --- include/linux/libata.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 3b7f5d9e2f87..059ca7f2b69c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1691,21 +1691,35 @@ extern struct ata_device *ata_dev_next(struct ata_device *dev, (dev) = ata_dev_next((dev), (link), ATA_DITER_##mode)) /** - * ata_ncq_enabled - Test whether NCQ is enabled - * @dev: ATA device to test for + * ata_ncq_supported - Test whether NCQ is supported + * @dev: ATA device to test * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: - * 1 if NCQ is enabled for @dev, 0 otherwise. + * true if @dev supports NCQ, false otherwise. */ -static inline int ata_ncq_enabled(struct ata_device *dev) +static inline bool ata_ncq_supported(struct ata_device *dev) { if (!IS_ENABLED(CONFIG_SATA_HOST)) - return 0; - return (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ_OFF | - ATA_DFLAG_NCQ)) == ATA_DFLAG_NCQ; + return false; + return (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ)) == ATA_DFLAG_NCQ; +} + +/** + * ata_ncq_enabled - Test whether NCQ is enabled + * @dev: ATA device to test + * + * LOCKING: + * spin_lock_irqsave(host lock) + * + * RETURNS: + * true if NCQ is enabled for @dev, false otherwise. + */ +static inline bool ata_ncq_enabled(struct ata_device *dev) +{ + return ata_ncq_supported(dev) && !(dev->flags & ATA_DFLAG_NCQ_OFF); } static inline bool ata_fpdma_dsm_supported(struct ata_device *dev) -- cgit v1.2.3 From 4d2e4980a5289ae31a1cff40d258b68573182a37 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 14 Oct 2022 18:05:38 +0900 Subject: ata: libata: cleanup fua support detection Move the detection of a device FUA support from ata_scsiop_mode_sense()/ata_dev_supports_fua() to device scan time in ata_dev_configure(). The function ata_dev_config_fua() is introduced to detect if a device supports FUA and this support is indicated using the new device flag ATA_DFLAG_FUA. In order to blacklist known buggy devices, the horkage flag ATA_HORKAGE_NO_FUA is introduced. Similarly to other horkage flags, the libata.force= arguments "fua" and "nofua" are also introduced to allow a user to control this horkage flag through the "force" libata module parameter. The ATA_DFLAG_FUA device flag is set only and only if all the following conditions are met: * libata.fua module parameter is set to 1 * The device supports the WRITE DMA FUA EXT command, * The device is not marked with the ATA_HORKAGE_NO_FUA flag, either from the blacklist or set by the user with libata.force=nofua * The device supports NCQ (while this is not mandated by the standards, this restriction is introduced to avoid problems with older non-NCQ devices). Enabling or diabling libata FUA support for all devices can now also be done using the "force=[no]fua" module parameter when libata.fua is set to 1. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Niklas Cassel --- include/linux/libata.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 059ca7f2b69c..a759dfbdcc91 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -90,6 +90,7 @@ enum { ATA_DFLAG_ACPI_FAILED = (1 << 6), /* ACPI on devcfg has failed */ ATA_DFLAG_AN = (1 << 7), /* AN configured */ ATA_DFLAG_TRUSTED = (1 << 8), /* device supports trusted send/recv */ + ATA_DFLAG_FUA = (1 << 9), /* device supports FUA */ ATA_DFLAG_DMADIR = (1 << 10), /* device requires DMADIR */ ATA_DFLAG_NCQ_SEND_RECV = (1 << 11), /* device supports NCQ SEND and RECV */ ATA_DFLAG_NCQ_PRIO = (1 << 12), /* device supports NCQ priority */ @@ -112,9 +113,9 @@ enum { ATA_DFLAG_D_SENSE = (1 << 29), /* Descriptor sense requested */ ATA_DFLAG_ZAC = (1 << 30), /* ZAC device */ - ATA_DFLAG_FEATURES_MASK = ATA_DFLAG_TRUSTED | ATA_DFLAG_DA | \ - ATA_DFLAG_DEVSLP | ATA_DFLAG_NCQ_SEND_RECV | \ - ATA_DFLAG_NCQ_PRIO, + ATA_DFLAG_FEATURES_MASK = (ATA_DFLAG_TRUSTED | ATA_DFLAG_DA | \ + ATA_DFLAG_DEVSLP | ATA_DFLAG_NCQ_SEND_RECV | \ + ATA_DFLAG_NCQ_PRIO | ATA_DFLAG_FUA), ATA_DEV_UNKNOWN = 0, /* unknown device */ ATA_DEV_ATA = 1, /* ATA device */ @@ -381,6 +382,7 @@ enum { ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ ATA_HORKAGE_NO_LOG_DIR = (1 << 29), /* Do not read log directory */ + ATA_HORKAGE_NO_FUA = (1 << 30), /* Do not use FUA */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 80e87442e69ba8589160c5d472c2d973d0731c86 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 12 Jan 2023 16:15:16 +0100 Subject: enetc: Separate C22 and C45 transactions The enetc MDIO bus driver can perform both C22 and C45 transfers. Create separate functions for each and register the C45 versions using the new API calls where appropriate. This driver is shared with the Felix DSA switch, so update that at the same time. Signed-off-by: Andrew Lunn Signed-off-by: Michael Walle Signed-off-by: Jakub Kicinski --- include/linux/fsl/enetc_mdio.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl/enetc_mdio.h b/include/linux/fsl/enetc_mdio.h index 2d9203314865..df25fffdc0ae 100644 --- a/include/linux/fsl/enetc_mdio.h +++ b/include/linux/fsl/enetc_mdio.h @@ -37,16 +37,27 @@ struct enetc_mdio_priv { #if IS_REACHABLE(CONFIG_FSL_ENETC_MDIO) -int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum); -int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value); +int enetc_mdio_read_c22(struct mii_bus *bus, int phy_id, int regnum); +int enetc_mdio_write_c22(struct mii_bus *bus, int phy_id, int regnum, + u16 value); +int enetc_mdio_read_c45(struct mii_bus *bus, int phy_id, int devad, int regnum); +int enetc_mdio_write_c45(struct mii_bus *bus, int phy_id, int devad, int regnum, + u16 value); struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs); #else -static inline int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum) +static inline int enetc_mdio_read_c22(struct mii_bus *bus, int phy_id, + int regnum) { return -EINVAL; } -static inline int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, - u16 value) +static inline int enetc_mdio_write_c22(struct mii_bus *bus, int phy_id, + int regnum, u16 value) +{ return -EINVAL; } +static inline int enetc_mdio_read_c45(struct mii_bus *bus, int phy_id, + int devad, int regnum) +{ return -EINVAL; } +static inline int enetc_mdio_write_c45(struct mii_bus *bus, int phy_id, + int devad, int regnum, u16 value) { return -EINVAL; } struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs) { return ERR_PTR(-EINVAL); } -- cgit v1.2.3 From 9b2e3723728efe03433be5c13b31da451d88ee3d Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 4 Jan 2023 11:43:35 +0200 Subject: net/mlx5: Introduce CQE error syndrome Introduces CQE error syndrome bits which are inside qp_context_extension and are used to report the reason the QP was moved to error state. Useful for cases in which a CQE isn't generated, such as remote write rkey violation. Signed-off-by: Patrisious Haddad Link: https://lore.kernel.org/r/f8359315f8130f6d2abe4b94409ac7802f54bce3.1672821186.git.leonro@nvidia.com Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 47 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a9ee7bc59c90..2d17b6a6d82d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1496,7 +1496,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 null_mkey[0x1]; u8 log_max_klm_list_size[0x6]; - u8 reserved_at_120[0xa]; + u8 reserved_at_120[0x2]; + u8 qpc_extension[0x1]; + u8 reserved_at_123[0x7]; u8 log_max_ra_req_dc[0x6]; u8 reserved_at_130[0x2]; u8 eth_wqe_too_small[0x1]; @@ -1662,7 +1664,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_bf_reg_size[0x5]; - u8 reserved_at_270[0x6]; + u8 reserved_at_270[0x3]; + u8 qp_error_syndrome[0x1]; + u8 reserved_at_274[0x2]; u8 lag_dct[0x2]; u8 lag_tx_port_affinity[0x1]; u8 lag_native_fdb_selection[0x1]; @@ -5342,6 +5346,37 @@ struct mlx5_ifc_query_rmp_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_cqe_error_syndrome_bits { + u8 hw_error_syndrome[0x8]; + u8 hw_syndrome_type[0x4]; + u8 reserved_at_c[0x4]; + u8 vendor_error_syndrome[0x8]; + u8 syndrome[0x8]; +}; + +struct mlx5_ifc_qp_context_extension_bits { + u8 reserved_at_0[0x60]; + + struct mlx5_ifc_cqe_error_syndrome_bits error_syndrome; + + u8 reserved_at_80[0x580]; +}; + +struct mlx5_ifc_qpc_extension_and_pas_list_in_bits { + struct mlx5_ifc_qp_context_extension_bits qpc_data_extension; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_qp_pas_list_in_bits { + struct mlx5_ifc_cmd_pas_bits pas[0]; +}; + +union mlx5_ifc_qp_pas_or_qpc_ext_and_pas_bits { + struct mlx5_ifc_qp_pas_list_in_bits qp_pas_list; + struct mlx5_ifc_qpc_extension_and_pas_list_in_bits qpc_ext_and_pas_list; +}; + struct mlx5_ifc_query_qp_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -5358,7 +5393,7 @@ struct mlx5_ifc_query_qp_out_bits { u8 reserved_at_800[0x80]; - u8 pas[][0x40]; + union mlx5_ifc_qp_pas_or_qpc_ext_and_pas_bits qp_pas_or_qpc_ext_and_pas; }; struct mlx5_ifc_query_qp_in_bits { @@ -5368,7 +5403,8 @@ struct mlx5_ifc_query_qp_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x8]; + u8 qpc_ext[0x1]; + u8 reserved_at_41[0x7]; u8 qpn[0x18]; u8 reserved_at_60[0x20]; @@ -8571,7 +8607,8 @@ struct mlx5_ifc_create_qp_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x8]; + u8 qpc_ext[0x1]; + u8 reserved_at_41[0x7]; u8 input_qpn[0x18]; u8 reserved_at_60[0x20]; -- cgit v1.2.3 From 312b8f79eb05479628ee71357749815b2eeeeea8 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 4 Jan 2023 11:43:34 +0200 Subject: RDMA/mlx: Calling qp event handler in workqueue context Move the call of qp event handler from atomic to workqueue context, so that the handler is able to block. This is needed by following patches. Signed-off-by: Mark Zhang Reviewed-by: Patrisious Haddad Link: https://lore.kernel.org/r/0cd17b8331e445f03942f4bb28d447f24ac5669d.1672821186.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx4/qp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 9db93e487496..c78b90f2e9a1 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -503,4 +503,5 @@ static inline u16 folded_qp(u32 q) u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn); +void mlx4_put_qp(struct mlx4_qp *qp); #endif /* MLX4_QP_H */ -- cgit v1.2.3 From 1ff45e6da54fc66ab2f9b4d3b202b29d1a706e01 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Sep 2022 15:16:38 +0200 Subject: ARM: sa1100: remove irda references IRDA support is long gone, so there is no need to declare the platform device data. See-also: d64c2a76123f ("staging: irda: remove the irda network stack and drivers") Signed-off-by: Arnd Bergmann fixup sa1100 irda Signed-off-by: Arnd Bergmann --- include/linux/platform_data/irda-sa11x0.h | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 include/linux/platform_data/irda-sa11x0.h (limited to 'include/linux') diff --git a/include/linux/platform_data/irda-sa11x0.h b/include/linux/platform_data/irda-sa11x0.h deleted file mode 100644 index 7db59c917575..000000000000 --- a/include/linux/platform_data/irda-sa11x0.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/include/asm/mach/irda.h - * - * Copyright (C) 2004 Russell King. - */ -#ifndef __ASM_ARM_MACH_IRDA_H -#define __ASM_ARM_MACH_IRDA_H - -struct irda_platform_data { - int (*startup)(struct device *); - void (*shutdown)(struct device *); - int (*set_power)(struct device *, unsigned int state); - void (*set_speed)(struct device *, unsigned int speed); -}; - -#endif -- cgit v1.2.3 From 028908f2ca6fc046a9932fb2d2f0409f4684ea41 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 09:25:25 +0200 Subject: ARM: mmp: remove custom sram code The MMP_SRAM code is no longer used by the tdma driver because the Kconfig symbol is not selected, so remove it along with its former callsite. Acked-By: Vinod Koul Signed-off-by: Arnd Bergmann --- include/linux/platform_data/dma-mmp_tdma.h | 36 ------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 include/linux/platform_data/dma-mmp_tdma.h (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-mmp_tdma.h b/include/linux/platform_data/dma-mmp_tdma.h deleted file mode 100644 index 8bec5484dc86..000000000000 --- a/include/linux/platform_data/dma-mmp_tdma.h +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * SRAM Memory Management - * - * Copyright (c) 2011 Marvell Semiconductors Inc. - */ - -#ifndef __DMA_MMP_TDMA_H -#define __DMA_MMP_TDMA_H - -#include - -/* ARBITRARY: SRAM allocations are multiples of this 2^N size */ -#define SRAM_GRANULARITY 512 - -enum sram_type { - MMP_SRAM_UNDEFINED = 0, - MMP_ASRAM, - MMP_ISRAM, -}; - -struct sram_platdata { - char *pool_name; - int granularity; -}; - -#ifdef CONFIG_MMP_SRAM -extern struct gen_pool *sram_get_gpool(char *pool_name); -#else -static inline struct gen_pool *sram_get_gpool(char *pool_name) -{ - return NULL; -} -#endif - -#endif /* __DMA_MMP_TDMA_H */ -- cgit v1.2.3 From 1027b4df8354c55a5ed538e391d4cf9c509b74a7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 12:02:12 +0200 Subject: ARM: mmp: remove old PM support Assuming that we don't actually want the old-style pm-mmp2.c and pm-pxa910.c implementation, all these files can go away as well. Signed-off-by: Arnd Bergmann --- include/linux/soc/mmp/cputype.h | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/mmp/cputype.h b/include/linux/soc/mmp/cputype.h index 221790761e8e..f13d127fadc4 100644 --- a/include/linux/soc/mmp/cputype.h +++ b/include/linux/soc/mmp/cputype.h @@ -26,29 +26,7 @@ extern unsigned int mmp_chip_id; -#ifdef CONFIG_CPU_PXA168 -static inline int cpu_is_pxa168(void) -{ - return (((read_cpuid_id() >> 8) & 0xff) == 0x84) && - ((mmp_chip_id & 0xfff) == 0x168); -} -#else -#define cpu_is_pxa168() (0) -#endif - -/* cpu_is_pxa910() is shared on both pxa910 and pxa920 */ -#ifdef CONFIG_CPU_PXA910 -static inline int cpu_is_pxa910(void) -{ - return (((read_cpuid_id() >> 8) & 0xff) == 0x84) && - (((mmp_chip_id & 0xfff) == 0x910) || - ((mmp_chip_id & 0xfff) == 0x920)); -} -#else -#define cpu_is_pxa910() (0) -#endif - -#if defined(CONFIG_CPU_MMP2) || defined(CONFIG_MACH_MMP2_DT) +#if defined(CONFIG_MACH_MMP2_DT) static inline int cpu_is_mmp2(void) { return (((read_cpuid_id() >> 8) & 0xff) == 0x58) && -- cgit v1.2.3 From 61b7f8920b176e3cb86c3b5e7c866261720a7917 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 Sep 2022 15:33:15 +0200 Subject: ARM: s3c: remove all s3c24xx support The platform was deprecated in commit 6a5e69c7ddea ("ARM: s3c: mark as deprecated and schedule removal") and can be removed. This includes all files that are exclusively for s3c24xx and not shared with s3c64xx, as well as the glue logic in Kconfig and the maintainer file entries. Cc: Arnaud Patard Cc: Ben Dooks Cc: Christer Weinigel Cc: Guillaume GOURAT Cc: Heiko Stuebner Cc: Simtec Linux Team Cc: openmoko-kernel@lists.openmoko.org Acked-by: Heiko Stuebner Acked-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- include/linux/amba/pl093.h | 77 ---------------------------------------------- 1 file changed, 77 deletions(-) delete mode 100644 include/linux/amba/pl093.h (limited to 'include/linux') diff --git a/include/linux/amba/pl093.h b/include/linux/amba/pl093.h deleted file mode 100644 index b17166e3b49a..000000000000 --- a/include/linux/amba/pl093.h +++ /dev/null @@ -1,77 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* linux/amba/pl093.h - * - * Copyright (c) 2008 Simtec Electronics - * http://armlinux.simtec.co.uk/ - * Ben Dooks - * - * AMBA PL093 SSMC (synchronous static memory controller) - * See DDI0236.pdf (r0p4) for more details -*/ - -#define SMB_BANK(x) ((x) * 0x20) /* each bank control set is 0x20 apart */ - -/* Offsets for SMBxxxxRy registers */ - -#define SMBIDCYR (0x00) -#define SMBWSTRDR (0x04) -#define SMBWSTWRR (0x08) -#define SMBWSTOENR (0x0C) -#define SMBWSTWENR (0x10) -#define SMBCR (0x14) -#define SMBSR (0x18) -#define SMBWSTBRDR (0x1C) - -/* Masks for SMB registers */ -#define IDCY_MASK (0xf) -#define WSTRD_MASK (0xf) -#define WSTWR_MASK (0xf) -#define WSTOEN_MASK (0xf) -#define WSTWEN_MASK (0xf) - -/* Notes from datasheet: - * WSTOEN <= WSTRD - * WSTWEN <= WSTWR - * - * WSTOEN is not used with nWAIT - */ - -/* SMBCR bit definitions */ -#define SMBCR_BIWRITEEN (1 << 21) -#define SMBCR_ADDRVALIDWRITEEN (1 << 20) -#define SMBCR_SYNCWRITE (1 << 17) -#define SMBCR_BMWRITE (1 << 16) -#define SMBCR_WRAPREAD (1 << 14) -#define SMBCR_BIREADEN (1 << 13) -#define SMBCR_ADDRVALIDREADEN (1 << 12) -#define SMBCR_SYNCREAD (1 << 9) -#define SMBCR_BMREAD (1 << 8) -#define SMBCR_SMBLSPOL (1 << 6) -#define SMBCR_WP (1 << 3) -#define SMBCR_WAITEN (1 << 2) -#define SMBCR_WAITPOL (1 << 1) -#define SMBCR_RBLE (1 << 0) - -#define SMBCR_BURSTLENWRITE_MASK (3 << 18) -#define SMBCR_BURSTLENWRITE_4 (0 << 18) -#define SMBCR_BURSTLENWRITE_8 (1 << 18) -#define SMBCR_BURSTLENWRITE_RESERVED (2 << 18) -#define SMBCR_BURSTLENWRITE_CONTINUOUS (3 << 18) - -#define SMBCR_BURSTLENREAD_MASK (3 << 10) -#define SMBCR_BURSTLENREAD_4 (0 << 10) -#define SMBCR_BURSTLENREAD_8 (1 << 10) -#define SMBCR_BURSTLENREAD_16 (2 << 10) -#define SMBCR_BURSTLENREAD_CONTINUOUS (3 << 10) - -#define SMBCR_MW_MASK (3 << 4) -#define SMBCR_MW_8BIT (0 << 4) -#define SMBCR_MW_16BIT (1 << 4) -#define SMBCR_MW_M32BIT (2 << 4) - -/* SSMC status registers */ -#define SSMCCSR (0x200) -#define SSMCCR (0x204) -#define SSMCITCR (0x208) -#define SSMCITIP (0x20C) -#define SSMCITIOP (0x210) -- cgit v1.2.3 From 1ea35b355722675b3b654475a37898742731d016 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 Sep 2022 15:43:53 +0200 Subject: ARM: s3c: remove s3c24xx specific hacks A number of device drivers reference CONFIG_ARM_S3C24XX_CPUFREQ or similar symbols that are no longer available with the platform gone, though the drivers themselves are still used on newer platforms, so remove these hacks. Acked-by: Greg Kroah-Hartman Acked-by: Linus Walleij Acked-by: Jonathan Cameron Acked-by: Daniel Lezcano Acked-by: Miquel Raynal Acked-by: Ulf Hansson Acked-by: Stephen Boyd Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- include/linux/clk/samsung.h | 32 --------------------- include/linux/soc/samsung/s3c-pm.h | 58 -------------------------------------- 2 files changed, 90 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/samsung.h b/include/linux/clk/samsung.h index 38b774001712..0cf7aac83439 100644 --- a/include/linux/clk/samsung.h +++ b/include/linux/clk/samsung.h @@ -21,36 +21,4 @@ static inline void s3c64xx_clk_init(struct device_node *np, bool s3c6400, void __iomem *base) { } #endif /* CONFIG_S3C64XX_COMMON_CLK */ -#ifdef CONFIG_S3C2410_COMMON_CLK -void s3c2410_common_clk_init(struct device_node *np, unsigned long xti_f, - int current_soc, - void __iomem *reg_base); -#else -static inline void s3c2410_common_clk_init(struct device_node *np, - unsigned long xti_f, - int current_soc, - void __iomem *reg_base) { } -#endif /* CONFIG_S3C2410_COMMON_CLK */ - -#ifdef CONFIG_S3C2412_COMMON_CLK -void s3c2412_common_clk_init(struct device_node *np, unsigned long xti_f, - unsigned long ext_f, void __iomem *reg_base); -#else -static inline void s3c2412_common_clk_init(struct device_node *np, - unsigned long xti_f, - unsigned long ext_f, - void __iomem *reg_base) { } -#endif /* CONFIG_S3C2412_COMMON_CLK */ - -#ifdef CONFIG_S3C2443_COMMON_CLK -void s3c2443_common_clk_init(struct device_node *np, unsigned long xti_f, - int current_soc, - void __iomem *reg_base); -#else -static inline void s3c2443_common_clk_init(struct device_node *np, - unsigned long xti_f, - int current_soc, - void __iomem *reg_base) { } -#endif /* CONFIG_S3C2443_COMMON_CLK */ - #endif /* __LINUX_CLK_SAMSUNG_H_ */ diff --git a/include/linux/soc/samsung/s3c-pm.h b/include/linux/soc/samsung/s3c-pm.h index f9164559c99f..5b23d85d20ab 100644 --- a/include/linux/soc/samsung/s3c-pm.h +++ b/include/linux/soc/samsung/s3c-pm.h @@ -14,58 +14,10 @@ /* PM debug functions */ -/** - * struct pm_uart_save - save block for core UART - * @ulcon: Save value for S3C2410_ULCON - * @ucon: Save value for S3C2410_UCON - * @ufcon: Save value for S3C2410_UFCON - * @umcon: Save value for S3C2410_UMCON - * @ubrdiv: Save value for S3C2410_UBRDIV - * - * Save block for UART registers to be held over sleep and restored if they - * are needed (say by debug). -*/ -struct pm_uart_save { - u32 ulcon; - u32 ucon; - u32 ufcon; - u32 umcon; - u32 ubrdiv; - u32 udivslot; -}; - -#ifdef CONFIG_SAMSUNG_PM_DEBUG -/** - * s3c_pm_dbg() - low level debug function for use in suspend/resume. - * @msg: The message to print. - * - * This function is used mainly to debug the resume process before the system - * can rely on printk/console output. It uses the low-level debugging output - * routine printascii() to do its work. - */ -extern void s3c_pm_dbg(const char *msg, ...); - -#define S3C_PMDBG(fmt...) s3c_pm_dbg(fmt) - -extern void s3c_pm_save_uarts(bool is_s3c24xx); -extern void s3c_pm_restore_uarts(bool is_s3c24xx); - -#ifdef CONFIG_ARCH_S3C64XX -extern void s3c_pm_arch_update_uart(void __iomem *regs, - struct pm_uart_save *save); -#else -static inline void -s3c_pm_arch_update_uart(void __iomem *regs, struct pm_uart_save *save) -{ -} -#endif - -#else #define S3C_PMDBG(fmt...) pr_debug(fmt) static inline void s3c_pm_save_uarts(bool is_s3c24xx) { } static inline void s3c_pm_restore_uarts(bool is_s3c24xx) { } -#endif /* suspend memory checking */ @@ -81,14 +33,4 @@ extern void s3c_pm_check_store(void); #define s3c_pm_check_store() do { } while (0) #endif -/* system device subsystems */ - -extern struct bus_type s3c2410_subsys; -extern struct bus_type s3c2410a_subsys; -extern struct bus_type s3c2412_subsys; -extern struct bus_type s3c2416_subsys; -extern struct bus_type s3c2440_subsys; -extern struct bus_type s3c2442_subsys; -extern struct bus_type s3c2443_subsys; - #endif -- cgit v1.2.3 From cc146410f9a4abae417b476ea1631089c0ddd171 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 Sep 2022 17:30:48 +0200 Subject: ARM: s3c: remove adc.c This driver could not be enabled on s3c64xx for a long time because of the ARCH_MULTIPLATFORM dependency, so remove it. Acked-by: Mark Brown Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- include/linux/soc/samsung/s3c-adc.h | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 include/linux/soc/samsung/s3c-adc.h (limited to 'include/linux') diff --git a/include/linux/soc/samsung/s3c-adc.h b/include/linux/soc/samsung/s3c-adc.h deleted file mode 100644 index 591c94ef957d..000000000000 --- a/include/linux/soc/samsung/s3c-adc.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) 2008 Simtec Electronics - * http://armlinux.simtec.co.uk/ - * Ben Dooks - * - * S3C ADC driver information - */ - -#ifndef __LINUX_SOC_SAMSUNG_S3C_ADC_H -#define __LINUX_SOC_SAMSUNG_S3C_ADC_H __FILE__ - -struct s3c_adc_client; -struct platform_device; - -extern int s3c_adc_start(struct s3c_adc_client *client, - unsigned int channel, unsigned int nr_samples); - -extern int s3c_adc_read(struct s3c_adc_client *client, unsigned int ch); - -extern struct s3c_adc_client * - s3c_adc_register(struct platform_device *pdev, - void (*select)(struct s3c_adc_client *client, - unsigned selected), - void (*conv)(struct s3c_adc_client *client, - unsigned d0, unsigned d1, - unsigned *samples_left), - unsigned int is_ts); - -extern void s3c_adc_release(struct s3c_adc_client *client); - -#endif /* __LINUX_SOC_SAMSUNG_S3C_ADC_H */ -- cgit v1.2.3 From 0d297df03890c385195fcf827432eb44f5b5918d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 Sep 2022 16:18:41 +0200 Subject: ARM: s3c: simplify platform code Following down the now unused symbols and header files, some additional content can be dropped that is used by neither the s3c64xx DT support nor the crag6410 board. Acked-by: Mark Brown Signed-off-by: Arnd Bergmann --- include/linux/platform_data/media/s5p_hdmi.h | 32 ---------------------------- 1 file changed, 32 deletions(-) delete mode 100644 include/linux/platform_data/media/s5p_hdmi.h (limited to 'include/linux') diff --git a/include/linux/platform_data/media/s5p_hdmi.h b/include/linux/platform_data/media/s5p_hdmi.h deleted file mode 100644 index 457321e917b9..000000000000 --- a/include/linux/platform_data/media/s5p_hdmi.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Driver header for S5P HDMI chip. - * - * Copyright (c) 2011 Samsung Electronics, Co. Ltd - * Contact: Tomasz Stanislawski - */ - -#ifndef S5P_HDMI_H -#define S5P_HDMI_H - -struct i2c_board_info; - -/** - * @hdmiphy_bus: controller id for HDMIPHY bus - * @hdmiphy_info: template for HDMIPHY I2C device - * @mhl_bus: controller id for MHL control bus - * @mhl_info: template for MHL I2C device - * @hpd_gpio: GPIO for Hot-Plug-Detect pin - * - * NULL pointer for *_info fields indicates that - * the corresponding chip is not present - */ -struct s5p_hdmi_platform_data { - int hdmiphy_bus; - struct i2c_board_info *hdmiphy_info; - int mhl_bus; - struct i2c_board_info *mhl_info; - int hpd_gpio; -}; - -#endif /* S5P_HDMI_H */ -- cgit v1.2.3 From a0f831756b26f87e983dbdc291398dc097ff73b9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 Sep 2022 16:30:52 +0200 Subject: power: remove s3c adc battery driver The s3c-adc driver is removed along with the s3c24xx platform, so the battery driver is no longer needed either. Reviewed-by: Krzysztof Kozlowski Acked-by: Sebastian Reichel Signed-off-by: Arnd Bergmann --- include/linux/s3c_adc_battery.h | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 include/linux/s3c_adc_battery.h (limited to 'include/linux') diff --git a/include/linux/s3c_adc_battery.h b/include/linux/s3c_adc_battery.h deleted file mode 100644 index 57f982c375f8..000000000000 --- a/include/linux/s3c_adc_battery.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _S3C_ADC_BATTERY_H -#define _S3C_ADC_BATTERY_H - -struct s3c_adc_bat_thresh { - int volt; /* mV */ - int cur; /* mA */ - int level; /* percent */ -}; - -struct s3c_adc_bat_pdata { - int (*init)(void); - void (*exit)(void); - void (*enable_charger)(void); - void (*disable_charger)(void); - - const struct s3c_adc_bat_thresh *lut_noac; - unsigned int lut_noac_cnt; - const struct s3c_adc_bat_thresh *lut_acin; - unsigned int lut_acin_cnt; - - const unsigned int volt_channel; - const unsigned int current_channel; - const unsigned int backup_volt_channel; - - const unsigned int volt_samples; - const unsigned int current_samples; - const unsigned int backup_volt_samples; - - const unsigned int volt_mult; - const unsigned int current_mult; - const unsigned int backup_volt_mult; - const unsigned int internal_impedance; - - const unsigned int backup_volt_max; - const unsigned int backup_volt_min; -}; - -#endif -- cgit v1.2.3 From d06dd30beb25b273d477a10ea204f595144c1174 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 13:01:02 +0200 Subject: pata: remove samsung_cf driver This device was only used by the smdk6410 board file that is now gone, so the driver can be removed as well. Reviewed-by: Sergey Shtylyov Acked-by: Damien Le Moal Signed-off-by: Arnd Bergmann --- include/linux/platform_data/ata-samsung_cf.h | 31 ---------------------------- 1 file changed, 31 deletions(-) delete mode 100644 include/linux/platform_data/ata-samsung_cf.h (limited to 'include/linux') diff --git a/include/linux/platform_data/ata-samsung_cf.h b/include/linux/platform_data/ata-samsung_cf.h deleted file mode 100644 index fccf969dc4da..000000000000 --- a/include/linux/platform_data/ata-samsung_cf.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2010 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * Samsung CF-ATA platform_device info -*/ - -#ifndef __ATA_SAMSUNG_CF_H -#define __ATA_SAMSUNG_CF_H __FILE__ - -/** - * struct s3c_ide_platdata - S3C IDE driver platform data. - * @setup_gpio: Setup the external GPIO pins to the right state for data - * transfer in true-ide mode. - */ -struct s3c_ide_platdata { - void (*setup_gpio)(void); -}; - -/* - * s3c_ide_set_platdata() - Setup the platform specifc data for IDE driver. - * @pdata: Platform data for IDE driver. - */ -extern void s3c_ide_set_platdata(struct s3c_ide_platdata *pdata); - -/* architecture-specific IDE configuration */ -extern void s3c64xx_ide_setup_gpio(void); -extern void s5pv210_ide_setup_gpio(void); - -#endif /*__ATA_SAMSUNG_CF_H */ -- cgit v1.2.3 From 0b14558977a777780bb18c2e86609cb53fe3d05a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 13:08:58 +0200 Subject: mmc: remove s3cmci driver The s3c24xx platform is gone, so this driver can be removed as well. Reviewed-by: Krzysztof Kozlowski Acked-by: Ulf Hansson Signed-off-by: Arnd Bergmann --- include/linux/platform_data/mmc-s3cmci.h | 51 -------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 include/linux/platform_data/mmc-s3cmci.h (limited to 'include/linux') diff --git a/include/linux/platform_data/mmc-s3cmci.h b/include/linux/platform_data/mmc-s3cmci.h deleted file mode 100644 index bacb86db3112..000000000000 --- a/include/linux/platform_data/mmc-s3cmci.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ARCH_MCI_H -#define _ARCH_MCI_H - -/** - * struct s3c24xx_mci_pdata - sd/mmc controller platform data - * @no_wprotect: Set this to indicate there is no write-protect switch. - * @no_detect: Set this if there is no detect switch. - * @wprotect_invert: Invert the default sense of the write protect switch. - * @use_dma: Set to allow the use of DMA. - * @gpio_detect: GPIO number for the card detect line. - * @gpio_wprotect: GPIO number for the write protect line. - * @ocr_avail: The mask of the available power states, non-zero to use. - * @set_power: Callback to control the power mode. - * - * The @gpio_detect is used for card detection when @no_wprotect is unset, - * and the default sense is that 0 returned from gpio_get_value() means - * that a card is inserted. If @detect_invert is set, then the value from - * gpio_get_value() is inverted, which makes 1 mean card inserted. - * - * The driver will use @gpio_wprotect to signal whether the card is write - * protected if @no_wprotect is not set. A 0 returned from gpio_get_value() - * means the card is read/write, and 1 means read-only. The @wprotect_invert - * will invert the value returned from gpio_get_value(). - * - * Card power is set by @ocr_availa, using MCC_VDD_ constants if it is set - * to a non-zero value, otherwise the default of 3.2-3.4V is used. - */ -struct s3c24xx_mci_pdata { - unsigned int no_wprotect:1; - unsigned int no_detect:1; - unsigned int wprotect_invert:1; - unsigned int use_dma:1; - - unsigned long ocr_avail; - void (*set_power)(unsigned char power_mode, - unsigned short vdd); - struct gpio_desc *bus[6]; -}; - -/** - * s3c24xx_mci_set_platdata - set platform data for mmc/sdi device - * @pdata: The platform data - * - * Copy the platform data supplied by @pdata so that this can be marked - * __initdata. - */ -extern void s3c24xx_mci_def_set_power(unsigned char power_mode, unsigned short vdd); -extern void s3c24xx_mci_set_platdata(struct s3c24xx_mci_pdata *pdata); - -#endif /* _ARCH_NCI_H */ -- cgit v1.2.3 From 594b3caeaf7938ce49a920b8fc835a67216ada28 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 13:11:14 +0200 Subject: clk: remove s3c24xx driver The s3c24xx platform is gone, so the clk driver can be removed as well. Acked-by: Stephen Boyd Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- include/linux/platform_data/clk-s3c2410.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/linux/platform_data/clk-s3c2410.h (limited to 'include/linux') diff --git a/include/linux/platform_data/clk-s3c2410.h b/include/linux/platform_data/clk-s3c2410.h deleted file mode 100644 index 7eb1cfa5409b..000000000000 --- a/include/linux/platform_data/clk-s3c2410.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) 2020 Krzysztof Kozlowski - */ - -#ifndef __LINUX_PLATFORM_DATA_CLK_S3C2410_H_ -#define __LINUX_PLATFORM_DATA_CLK_S3C2410_H_ - -/** - * struct s3c2410_clk_platform_data - platform data for S3C2410 clock driver - * - * @modify_misccr: Function to modify the MISCCR and return the new value - */ -struct s3c2410_clk_platform_data { - unsigned int (*modify_misccr)(unsigned int clr, unsigned int chg); -}; - -#endif /* __LINUX_PLATFORM_DATA_CLK_S3C2410_H_ */ - -- cgit v1.2.3 From 7d1ec119e3c1ec060370f5dc58490b51368c554c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 13:18:46 +0200 Subject: leds: remove s3c24xx driver The s3c24xx platform is gone, so the led driver can be removed as well. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- include/linux/platform_data/leds-s3c24xx.h | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 include/linux/platform_data/leds-s3c24xx.h (limited to 'include/linux') diff --git a/include/linux/platform_data/leds-s3c24xx.h b/include/linux/platform_data/leds-s3c24xx.h deleted file mode 100644 index 64f8d14876e0..000000000000 --- a/include/linux/platform_data/leds-s3c24xx.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2006 Simtec Electronics - * http://armlinux.simtec.co.uk/ - * Ben Dooks - * - * S3C24XX - LEDs GPIO connector -*/ - -#ifndef __LEDS_S3C24XX_H -#define __LEDS_S3C24XX_H - -struct s3c24xx_led_platdata { - char *name; - char *def_trigger; -}; - -#endif /* __LEDS_S3C24XX_H */ -- cgit v1.2.3 From 1fa774f706a9cfa68bc7dea9e369be3af86fac93 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 13:20:47 +0200 Subject: usb: gadget: remove s3c24xx drivers The s3c24xx platform is gone, so both the udc and hsudc drivers can be removed as well. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- include/linux/platform_data/s3c-hsudc.h | 33 --------------------------- include/linux/platform_data/usb-s3c2410_udc.h | 33 --------------------------- 2 files changed, 66 deletions(-) delete mode 100644 include/linux/platform_data/s3c-hsudc.h delete mode 100644 include/linux/platform_data/usb-s3c2410_udc.h (limited to 'include/linux') diff --git a/include/linux/platform_data/s3c-hsudc.h b/include/linux/platform_data/s3c-hsudc.h deleted file mode 100644 index a170939832d5..000000000000 --- a/include/linux/platform_data/s3c-hsudc.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * S3C24XX USB 2.0 High-speed USB controller gadget driver - * - * Copyright (c) 2010 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * The S3C24XX USB 2.0 high-speed USB controller supports upto 9 endpoints. - * Each endpoint can be configured as either in or out endpoint. Endpoints - * can be configured for Bulk or Interrupt transfer mode. -*/ - -#ifndef __LINUX_USB_S3C_HSUDC_H -#define __LINUX_USB_S3C_HSUDC_H - -/** - * s3c24xx_hsudc_platdata - Platform data for USB High-Speed gadget controller. - * @epnum: Number of endpoints to be instantiated by the controller driver. - * @gpio_init: Platform specific USB related GPIO initialization. - * @gpio_uninit: Platform specific USB releted GPIO uninitialzation. - * - * Representation of platform data for the S3C24XX USB 2.0 High Speed gadget - * controllers. - */ -struct s3c24xx_hsudc_platdata { - unsigned int epnum; - void (*gpio_init)(void); - void (*gpio_uninit)(void); - void (*phy_init)(void); - void (*phy_uninit)(void); -}; - -#endif /* __LINUX_USB_S3C_HSUDC_H */ diff --git a/include/linux/platform_data/usb-s3c2410_udc.h b/include/linux/platform_data/usb-s3c2410_udc.h deleted file mode 100644 index c0fbe1fe3426..000000000000 --- a/include/linux/platform_data/usb-s3c2410_udc.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* arch/arm/plat-samsung/include/plat/udc.h - * - * Copyright (c) 2005 Arnaud Patard - * - * Changelog: - * 14-Mar-2005 RTP Created file - * 02-Aug-2005 RTP File rename - * 07-Sep-2005 BJD Minor cleanups, changed cmd to enum - * 18-Jan-2007 HMW Add per-platform vbus_draw function -*/ - -#ifndef __ASM_ARM_ARCH_UDC_H -#define __ASM_ARM_ARCH_UDC_H - -enum s3c2410_udc_cmd_e { - S3C2410_UDC_P_ENABLE = 1, /* Pull-up enable */ - S3C2410_UDC_P_DISABLE = 2, /* Pull-up disable */ - S3C2410_UDC_P_RESET = 3, /* UDC reset, in case of */ -}; - -struct s3c2410_udc_mach_info { - void (*udc_command)(enum s3c2410_udc_cmd_e); - void (*vbus_draw)(unsigned int ma); -}; - -extern void __init s3c24xx_udc_set_platdata(struct s3c2410_udc_mach_info *); - -struct s3c24xx_hsudc_platdata; - -extern void __init s3c24xx_hsudc_set_platdata(struct s3c24xx_hsudc_platdata *pd); - -#endif /* __ASM_ARM_ARCH_UDC_H */ -- cgit v1.2.3 From a7ddf74b784b57ec7c93b780ec8236dc6aa4db02 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 21 Oct 2022 22:27:47 +0200 Subject: dmaengine: remove s3c24xx driver The s3c24xx platform was removed and this driver is no longer needed. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- include/linux/platform_data/dma-s3c24xx.h | 48 ------------------------------- 1 file changed, 48 deletions(-) delete mode 100644 include/linux/platform_data/dma-s3c24xx.h (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h deleted file mode 100644 index 96d02dbeea67..000000000000 --- a/include/linux/platform_data/dma-s3c24xx.h +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * S3C24XX DMA handling - * - * Copyright (c) 2013 Heiko Stuebner - */ - -/* Helper to encode the source selection constraints for early s3c socs. */ -#define S3C24XX_DMA_CHANREQ(src, chan) ((BIT(3) | src) << chan * 4) - -enum s3c24xx_dma_bus { - S3C24XX_DMA_APB, - S3C24XX_DMA_AHB, -}; - -/** - * @bus: on which bus does the peripheral reside - AHB or APB. - * @handshake: is a handshake with the peripheral necessary - * @chansel: channel selection information, depending on variant; reqsel for - * s3c2443 and later and channel-selection map for earlier SoCs - * see CHANSEL doc in s3c2443-dma.c - */ -struct s3c24xx_dma_channel { - enum s3c24xx_dma_bus bus; - bool handshake; - u16 chansel; -}; - -struct dma_slave_map; - -/** - * struct s3c24xx_dma_platdata - platform specific settings - * @num_phy_channels: number of physical channels - * @channels: array of virtual channel descriptions - * @num_channels: number of virtual channels - * @slave_map: dma slave map matching table - * @slavecnt: number of elements in slave_map - */ -struct s3c24xx_dma_platdata { - int num_phy_channels; - struct s3c24xx_dma_channel *channels; - int num_channels; - const struct dma_slave_map *slave_map; - int slavecnt; -}; - -struct dma_chan; -bool s3c24xx_dma_filter(struct dma_chan *chan, void *param); -- cgit v1.2.3 From 014e79d7eccea9fe77da891fa04cde75db0af9c9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 13:41:15 +0200 Subject: cpufreq: remove s3c24xx drivers All s3c24xx platforms were removed, so these five drivers are all obsolete now. Reviewed-by: Krzysztof Kozlowski Acked-by: Viresh Kumar Signed-off-by: Arnd Bergmann --- include/linux/soc/samsung/s3c-cpufreq-core.h | 299 --------------------------- 1 file changed, 299 deletions(-) delete mode 100644 include/linux/soc/samsung/s3c-cpufreq-core.h (limited to 'include/linux') diff --git a/include/linux/soc/samsung/s3c-cpufreq-core.h b/include/linux/soc/samsung/s3c-cpufreq-core.h deleted file mode 100644 index 3b278afb769b..000000000000 --- a/include/linux/soc/samsung/s3c-cpufreq-core.h +++ /dev/null @@ -1,299 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) 2006-2009 Simtec Electronics - * http://armlinux.simtec.co.uk/ - * Ben Dooks - * - * S3C CPU frequency scaling support - core support - */ -#ifndef __LINUX_SOC_SAMSUNG_S3C_CPUFREQ_CORE_H -#define __LINUX_SOC_SAMSUNG_S3C_CPUFREQ_CORE_H - -#include - -struct seq_file; - -#define MAX_BANKS (8) -#define S3C2412_MAX_IO (8) - -/** - * struct s3c2410_iobank_timing - IO bank timings for S3C2410 style timings - * @bankcon: The cached version of settings in this structure. - * @tacp: - * @tacs: Time from address valid to nCS asserted. - * @tcos: Time from nCS asserted to nOE or nWE asserted. - * @tacc: Time that nOE or nWE is asserted. - * @tcoh: Time nCS is held after nOE or nWE are released. - * @tcah: Time address is held for after - * @nwait_en: Whether nWAIT is enabled for this bank. - * - * This structure represents the IO timings for a S3C2410 style IO bank - * used by the CPU frequency support if it needs to change the settings - * of the IO. - */ -struct s3c2410_iobank_timing { - unsigned long bankcon; - unsigned int tacp; - unsigned int tacs; - unsigned int tcos; - unsigned int tacc; - unsigned int tcoh; /* nCS hold after nOE/nWE */ - unsigned int tcah; /* Address hold after nCS */ - unsigned char nwait_en; /* nWait enabled for bank. */ -}; - -/** - * struct s3c2412_iobank_timing - io timings for PL092 (S3C2412) style IO - * @idcy: The idle cycle time between transactions. - * @wstrd: nCS release to end of read cycle. - * @wstwr: nCS release to end of write cycle. - * @wstoen: nCS assertion to nOE assertion time. - * @wstwen: nCS assertion to nWE assertion time. - * @wstbrd: Burst ready delay. - * @smbidcyr: Register cache for smbidcyr value. - * @smbwstrd: Register cache for smbwstrd value. - * @smbwstwr: Register cache for smbwstwr value. - * @smbwstoen: Register cache for smbwstoen value. - * @smbwstwen: Register cache for smbwstwen value. - * @smbwstbrd: Register cache for smbwstbrd value. - * - * Timing information for a IO bank on an S3C2412 or similar system which - * uses a PL093 block. - */ -struct s3c2412_iobank_timing { - unsigned int idcy; - unsigned int wstrd; - unsigned int wstwr; - unsigned int wstoen; - unsigned int wstwen; - unsigned int wstbrd; - - /* register cache */ - unsigned char smbidcyr; - unsigned char smbwstrd; - unsigned char smbwstwr; - unsigned char smbwstoen; - unsigned char smbwstwen; - unsigned char smbwstbrd; -}; - -union s3c_iobank { - struct s3c2410_iobank_timing *io_2410; - struct s3c2412_iobank_timing *io_2412; -}; - -/** - * struct s3c_iotimings - Chip IO timings holder - * @bank: The timings for each IO bank. - */ -struct s3c_iotimings { - union s3c_iobank bank[MAX_BANKS]; -}; - -/** - * struct s3c_plltab - PLL table information. - * @vals: List of PLL values. - * @size: Size of the PLL table @vals. - */ -struct s3c_plltab { - struct s3c_pllval *vals; - int size; -}; - -/** - * struct s3c_cpufreq_config - current cpu frequency configuration - * @freq: The current settings for the core clocks. - * @max: Maxium settings, derived from core, board and user settings. - * @pll: The PLL table entry for the current PLL settings. - * @divs: The divisor settings for the core clocks. - * @info: The current core driver information. - * @board: The information for the board we are running on. - * @lock_pll: Set if the PLL settings cannot be changed. - * - * This is for the core drivers that need to know information about - * the current settings and values. It should not be needed by any - * device drivers. -*/ -struct s3c_cpufreq_config { - struct s3c_freq freq; - struct s3c_freq max; - struct clk *mpll; - struct cpufreq_frequency_table pll; - struct s3c_clkdivs divs; - struct s3c_cpufreq_info *info; /* for core, not drivers */ - struct s3c_cpufreq_board *board; - - unsigned int lock_pll:1; -}; - -/** - * struct s3c_cpufreq_info - Information for the CPU frequency driver. - * @name: The name of this implementation. - * @max: The maximum frequencies for the system. - * @latency: Transition latency to give to cpufreq. - * @locktime_m: The lock-time in uS for the MPLL. - * @locktime_u: The lock-time in uS for the UPLL. - * @locttime_bits: The number of bits each LOCKTIME field. - * @need_pll: Set if this driver needs to change the PLL values to achieve - * any frequency changes. This is really only need by devices like the - * S3C2410 where there is no or limited divider between the PLL and the - * ARMCLK. - * @get_iotiming: Get the current IO timing data, mainly for use at start. - * @set_iotiming: Update the IO timings from the cached copies calculated - * from the @calc_iotiming entry when changing the frequency. - * @calc_iotiming: Calculate and update the cached copies of the IO timings - * from the newly calculated frequencies. - * @calc_freqtable: Calculate (fill in) the given frequency table from the - * current frequency configuration. If the table passed in is NULL, - * then the return is the number of elements to be filled for allocation - * of the table. - * @set_refresh: Set the memory refresh configuration. - * @set_fvco: Set the PLL frequencies. - * @set_divs: Update the clock divisors. - * @calc_divs: Calculate the clock divisors. - */ -struct s3c_cpufreq_info { - const char *name; - struct s3c_freq max; - - unsigned int latency; - - unsigned int locktime_m; - unsigned int locktime_u; - unsigned char locktime_bits; - - unsigned int need_pll:1; - - /* driver routines */ - - int (*get_iotiming)(struct s3c_cpufreq_config *cfg, - struct s3c_iotimings *timings); - - void (*set_iotiming)(struct s3c_cpufreq_config *cfg, - struct s3c_iotimings *timings); - - int (*calc_iotiming)(struct s3c_cpufreq_config *cfg, - struct s3c_iotimings *timings); - - int (*calc_freqtable)(struct s3c_cpufreq_config *cfg, - struct cpufreq_frequency_table *t, - size_t table_size); - - void (*debug_io_show)(struct seq_file *seq, - struct s3c_cpufreq_config *cfg, - union s3c_iobank *iob); - - void (*set_refresh)(struct s3c_cpufreq_config *cfg); - void (*set_fvco)(struct s3c_cpufreq_config *cfg); - void (*set_divs)(struct s3c_cpufreq_config *cfg); - int (*calc_divs)(struct s3c_cpufreq_config *cfg); -}; - -extern int s3c_cpufreq_register(struct s3c_cpufreq_info *info); - -extern int s3c_plltab_register(struct cpufreq_frequency_table *plls, - unsigned int plls_no); - -/* exports and utilities for debugfs */ -extern struct s3c_cpufreq_config *s3c_cpufreq_getconfig(void); -extern struct s3c_iotimings *s3c_cpufreq_getiotimings(void); - -#ifdef CONFIG_ARM_S3C24XX_CPUFREQ_DEBUGFS -#define s3c_cpufreq_debugfs_call(x) x -#else -#define s3c_cpufreq_debugfs_call(x) NULL -#endif - -/* Useful utility functions. */ - -extern struct clk *s3c_cpufreq_clk_get(struct device *, const char *); - -/* S3C2410 and compatible exported functions */ - -extern void s3c2410_cpufreq_setrefresh(struct s3c_cpufreq_config *cfg); -extern void s3c2410_set_fvco(struct s3c_cpufreq_config *cfg); - -#ifdef CONFIG_S3C2410_IOTIMING -extern void s3c2410_iotiming_debugfs(struct seq_file *seq, - struct s3c_cpufreq_config *cfg, - union s3c_iobank *iob); - -extern int s3c2410_iotiming_calc(struct s3c_cpufreq_config *cfg, - struct s3c_iotimings *iot); - -extern int s3c2410_iotiming_get(struct s3c_cpufreq_config *cfg, - struct s3c_iotimings *timings); - -extern void s3c2410_iotiming_set(struct s3c_cpufreq_config *cfg, - struct s3c_iotimings *iot); -#else -#define s3c2410_iotiming_debugfs NULL -#define s3c2410_iotiming_calc NULL -#define s3c2410_iotiming_get NULL -#define s3c2410_iotiming_set NULL -#endif /* CONFIG_S3C2410_IOTIMING */ - -/* S3C2412 compatible routines */ - -#ifdef CONFIG_S3C2412_IOTIMING -extern void s3c2412_iotiming_debugfs(struct seq_file *seq, - struct s3c_cpufreq_config *cfg, - union s3c_iobank *iob); - -extern int s3c2412_iotiming_get(struct s3c_cpufreq_config *cfg, - struct s3c_iotimings *timings); - -extern int s3c2412_iotiming_calc(struct s3c_cpufreq_config *cfg, - struct s3c_iotimings *iot); - -extern void s3c2412_iotiming_set(struct s3c_cpufreq_config *cfg, - struct s3c_iotimings *iot); -extern void s3c2412_cpufreq_setrefresh(struct s3c_cpufreq_config *cfg); -#else -#define s3c2412_iotiming_debugfs NULL -#define s3c2412_iotiming_calc NULL -#define s3c2412_iotiming_get NULL -#define s3c2412_iotiming_set NULL -#endif /* CONFIG_S3C2412_IOTIMING */ - -#ifdef CONFIG_ARM_S3C24XX_CPUFREQ_DEBUG -#define s3c_freq_dbg(x...) printk(KERN_INFO x) -#else -#define s3c_freq_dbg(x...) do { if (0) printk(x); } while (0) -#endif /* CONFIG_ARM_S3C24XX_CPUFREQ_DEBUG */ - -#ifdef CONFIG_ARM_S3C24XX_CPUFREQ_IODEBUG -#define s3c_freq_iodbg(x...) printk(KERN_INFO x) -#else -#define s3c_freq_iodbg(x...) do { if (0) printk(x); } while (0) -#endif /* CONFIG_ARM_S3C24XX_CPUFREQ_IODEBUG */ - -static inline int s3c_cpufreq_addfreq(struct cpufreq_frequency_table *table, - int index, size_t table_size, - unsigned int freq) -{ - if (index < 0) - return index; - - if (table) { - if (index >= table_size) - return -ENOMEM; - - s3c_freq_dbg("%s: { %d = %u kHz }\n", - __func__, index, freq); - - table[index].driver_data = index; - table[index].frequency = freq; - } - - return index + 1; -} - -u32 s3c2440_read_camdivn(void); -void s3c2440_write_camdivn(u32 camdiv); -u32 s3c24xx_read_clkdivn(void); -void s3c24xx_write_clkdivn(u32 clkdiv); -u32 s3c24xx_read_mpllcon(void); -void s3c24xx_write_locktime(u32 locktime); - -#endif -- cgit v1.2.3 From f48fd50b03d2482dbcc2e672bdcf147984b5d955 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 15:19:55 +0200 Subject: fbdev: remove s3c2410 framebuffer The s3c24xx platform was removed, so the framebuffer driver is no longer needed. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- include/linux/platform_data/fb-s3c2410.h | 99 -------------------------------- 1 file changed, 99 deletions(-) delete mode 100644 include/linux/platform_data/fb-s3c2410.h (limited to 'include/linux') diff --git a/include/linux/platform_data/fb-s3c2410.h b/include/linux/platform_data/fb-s3c2410.h deleted file mode 100644 index 10c11e6316d6..000000000000 --- a/include/linux/platform_data/fb-s3c2410.h +++ /dev/null @@ -1,99 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) 2004 Arnaud Patard - * - * Inspired by pxafb.h -*/ - -#ifndef __ASM_PLAT_FB_S3C2410_H -#define __ASM_PLAT_FB_S3C2410_H __FILE__ - -#include - -struct s3c2410fb_hw { - unsigned long lcdcon1; - unsigned long lcdcon2; - unsigned long lcdcon3; - unsigned long lcdcon4; - unsigned long lcdcon5; -}; - -/* LCD description */ -struct s3c2410fb_display { - /* LCD type */ - unsigned type; -#define S3C2410_LCDCON1_DSCAN4 (0<<5) -#define S3C2410_LCDCON1_STN4 (1<<5) -#define S3C2410_LCDCON1_STN8 (2<<5) -#define S3C2410_LCDCON1_TFT (3<<5) - -#define S3C2410_LCDCON1_TFT1BPP (8<<1) -#define S3C2410_LCDCON1_TFT2BPP (9<<1) -#define S3C2410_LCDCON1_TFT4BPP (10<<1) -#define S3C2410_LCDCON1_TFT8BPP (11<<1) -#define S3C2410_LCDCON1_TFT16BPP (12<<1) -#define S3C2410_LCDCON1_TFT24BPP (13<<1) - - /* Screen size */ - unsigned short width; - unsigned short height; - - /* Screen info */ - unsigned short xres; - unsigned short yres; - unsigned short bpp; - - unsigned pixclock; /* pixclock in picoseconds */ - unsigned short left_margin; /* value in pixels (TFT) or HCLKs (STN) */ - unsigned short right_margin; /* value in pixels (TFT) or HCLKs (STN) */ - unsigned short hsync_len; /* value in pixels (TFT) or HCLKs (STN) */ - unsigned short upper_margin; /* value in lines (TFT) or 0 (STN) */ - unsigned short lower_margin; /* value in lines (TFT) or 0 (STN) */ - unsigned short vsync_len; /* value in lines (TFT) or 0 (STN) */ - - /* lcd configuration registers */ - unsigned long lcdcon5; -#define S3C2410_LCDCON5_BPP24BL (1<<12) -#define S3C2410_LCDCON5_FRM565 (1<<11) -#define S3C2410_LCDCON5_INVVCLK (1<<10) -#define S3C2410_LCDCON5_INVVLINE (1<<9) -#define S3C2410_LCDCON5_INVVFRAME (1<<8) -#define S3C2410_LCDCON5_INVVD (1<<7) -#define S3C2410_LCDCON5_INVVDEN (1<<6) -#define S3C2410_LCDCON5_INVPWREN (1<<5) -#define S3C2410_LCDCON5_INVLEND (1<<4) -#define S3C2410_LCDCON5_PWREN (1<<3) -#define S3C2410_LCDCON5_ENLEND (1<<2) -#define S3C2410_LCDCON5_BSWP (1<<1) -#define S3C2410_LCDCON5_HWSWP (1<<0) -}; - -struct s3c2410fb_mach_info { - - struct s3c2410fb_display *displays; /* attached displays info */ - unsigned num_displays; /* number of defined displays */ - unsigned default_display; - - /* GPIOs */ - - unsigned long gpcup; - unsigned long gpcup_mask; - unsigned long gpccon; - unsigned long gpccon_mask; - unsigned long gpdup; - unsigned long gpdup_mask; - unsigned long gpdcon; - unsigned long gpdcon_mask; - - void __iomem * gpccon_reg; - void __iomem * gpcup_reg; - void __iomem * gpdcon_reg; - void __iomem * gpdup_reg; - - /* lpc3600 control register */ - unsigned long lpcsel; -}; - -extern void s3c24xx_fb_set_platdata(struct s3c2410fb_mach_info *); - -#endif /* __ASM_PLAT_FB_S3C2410_H */ -- cgit v1.2.3 From c9dcd56feb820d50d1bbd52c135caa4e628e4762 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 13:22:04 +0200 Subject: spi: remove s3c24xx driver The s3c24xx platform was removed,s o there are no remaining users for its spi driver. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- include/linux/spi/s3c24xx-fiq.h | 33 --------------------------------- include/linux/spi/s3c24xx.h | 20 -------------------- 2 files changed, 53 deletions(-) delete mode 100644 include/linux/spi/s3c24xx-fiq.h delete mode 100644 include/linux/spi/s3c24xx.h (limited to 'include/linux') diff --git a/include/linux/spi/s3c24xx-fiq.h b/include/linux/spi/s3c24xx-fiq.h deleted file mode 100644 index d2842ac1de27..000000000000 --- a/include/linux/spi/s3c24xx-fiq.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* linux/drivers/spi/spi_s3c24xx_fiq.h - * - * Copyright 2009 Simtec Electronics - * Ben Dooks - * - * S3C24XX SPI - FIQ pseudo-DMA transfer support -*/ - -#ifndef __LINUX_SPI_S3C24XX_FIQ_H -#define __LINUX_SPI_S3C24XX_FIQ_H __FILE__ - -/* We have R8 through R13 to play with */ - -#ifdef __ASSEMBLY__ -#define __REG_NR(x) r##x -#else - -extern struct spi_fiq_code s3c24xx_spi_fiq_txrx; -extern struct spi_fiq_code s3c24xx_spi_fiq_tx; -extern struct spi_fiq_code s3c24xx_spi_fiq_rx; - -#define __REG_NR(x) (x) -#endif - -#define fiq_rspi __REG_NR(8) -#define fiq_rtmp __REG_NR(9) -#define fiq_rrx __REG_NR(10) -#define fiq_rtx __REG_NR(11) -#define fiq_rcount __REG_NR(12) -#define fiq_rirq __REG_NR(13) - -#endif /* __LINUX_SPI_S3C24XX_FIQ_H */ diff --git a/include/linux/spi/s3c24xx.h b/include/linux/spi/s3c24xx.h deleted file mode 100644 index 9b8bb22d5b0c..000000000000 --- a/include/linux/spi/s3c24xx.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2006 Simtec Electronics - * Ben Dooks - * - * S3C2410 - SPI Controller platform_device info -*/ - -#ifndef __LINUX_SPI_S3C24XX_H -#define __LINUX_SPI_S3C24XX_H __FILE__ - -struct s3c2410_spi_info { - unsigned int num_cs; /* total chipselects */ - int bus_num; /* bus number to use. */ - unsigned int use_fiq:1; /* use fiq */ -}; - -extern int s3c24xx_set_fiq(unsigned int irq, u32 *ack_ptr, bool on); - -#endif /* __LINUX_SPI_S3C24XX_H */ -- cgit v1.2.3 From 503278c12701831e4be18a3e5b95cd087d6f7542 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 13:50:51 +0200 Subject: ASoC: samsung: remove unused drivers The s3c24xx SoC platform was completely removed, as were most of the s3c64xx based board files, leaving only the DT based machines as well as the MACH_WLF_CRAGG_6410 machine. All other board specific ASoC driver can can now be recycled. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- include/linux/platform_data/asoc-s3c24xx_simtec.h | 30 ----------------------- 1 file changed, 30 deletions(-) delete mode 100644 include/linux/platform_data/asoc-s3c24xx_simtec.h (limited to 'include/linux') diff --git a/include/linux/platform_data/asoc-s3c24xx_simtec.h b/include/linux/platform_data/asoc-s3c24xx_simtec.h deleted file mode 100644 index 1a7efc98d108..000000000000 --- a/include/linux/platform_data/asoc-s3c24xx_simtec.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2008 Simtec Electronics - * http://armlinux.simtec.co.uk/ - * Ben Dooks - * - * Simtec Audio support. -*/ - -/** - * struct s3c24xx_audio_simtec_pdata - platform data for simtec audio - * @use_mpllin: Select codec clock from MPLLin - * @output_cdclk: Need to output CDCLK to the codec - * @have_mic: Set if we have a MIC socket - * @have_lout: Set if we have a LineOut socket - * @amp_gpio: GPIO pin to enable the AMP - * @amp_gain: Option GPIO to control AMP gain - */ -struct s3c24xx_audio_simtec_pdata { - unsigned int use_mpllin:1; - unsigned int output_cdclk:1; - - unsigned int have_mic:1; - unsigned int have_lout:1; - - int amp_gpio; - int amp_gain[2]; - - void (*startup)(void); -}; -- cgit v1.2.3 From eede0a75b96c7cfc3a61a4b01dd3674e8742039f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Oct 2022 17:33:20 +0200 Subject: parport: remove ax88796 driver The s3c24xx bast platform was removed, so this driver has no users any more and can be removed as well. Acked-by: Sudip Mukherjee Signed-off-by: Arnd Bergmann --- include/linux/parport.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/parport.h b/include/linux/parport.h index 1c16ffb8b908..a0bc9e0267b7 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -40,10 +40,6 @@ struct amiga_parport_state { unsigned char statusdir;/* ciab.ddrb & 7 */ }; -struct ax88796_parport_state { - unsigned char cpr; -}; - struct ip32_parport_state { unsigned int dcr; unsigned int ecr; @@ -55,7 +51,6 @@ struct parport_state { /* ARC has no state. */ struct ax_parport_state ax; struct amiga_parport_state amiga; - struct ax88796_parport_state ax88796; /* Atari has not state. */ struct ip32_parport_state ip32; void *misc; -- cgit v1.2.3 From 7b3c4c370c09313e22b555e79167e73d233611d1 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 16 Jan 2023 12:15:09 +0100 Subject: regmap: Rework regmap_mdio_c45_{read|write} for new C45 API. The MDIO subsystem is getting rid of MII_ADDR_C45 and thus also encoding associated encoding of the C45 device address and register address into one value. regmap-mdio also uses this encoding for the C45 bus. Move to the new C45 helpers for MDIO access and provide regmap-mdio helper macros. Signed-off-by: Andrew Lunn Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20230116111509.4086236-1-michael@walle.cc Signed-off-by: Mark Brown --- include/linux/regmap.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index a3bc695bcca0..029b9e09d3ca 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -38,6 +38,14 @@ struct regmap_field; struct snd_ac97; struct sdw_slave; +/* + * regmap_mdio address encoding. IEEE 802.3ae clause 45 addresses consist of a + * device address and a register address. + */ +#define REGMAP_MDIO_C45_DEVAD_SHIFT 16 +#define REGMAP_MDIO_C45_DEVAD_MASK GENMASK(20, 16) +#define REGMAP_MDIO_C45_REGNUM_MASK GENMASK(15, 0) + /* An enum of all the supported cache types */ enum regcache_type { REGCACHE_NONE, -- cgit v1.2.3 From 71dc9ec9ac7d3eee785cdc986c3daeb821381e20 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Fri, 13 Jan 2023 22:21:37 +0000 Subject: virtio/vsock: replace virtio_vsock_pkt with sk_buff This commit changes virtio/vsock to use sk_buff instead of virtio_vsock_pkt. Beyond better conforming to other net code, using sk_buff allows vsock to use sk_buff-dependent features in the future (such as sockmap) and improves throughput. This patch introduces the following performance changes: Tool: Uperf Env: Phys Host + L1 Guest Payload: 64k Threads: 16 Test Runs: 10 Type: SOCK_STREAM Before: commit b7bfaa761d760 ("Linux 6.2-rc3") Before ------ g2h: 16.77Gb/s h2g: 10.56Gb/s After ----- g2h: 21.04Gb/s h2g: 10.76Gb/s Signed-off-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/linux/virtio_vsock.h | 129 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 109 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 35d7eedb5e8e..3f9c16611306 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -7,6 +7,109 @@ #include #include +#define VIRTIO_VSOCK_SKB_HEADROOM (sizeof(struct virtio_vsock_hdr)) + +struct virtio_vsock_skb_cb { + bool reply; + bool tap_delivered; +}; + +#define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb)) + +static inline struct virtio_vsock_hdr *virtio_vsock_hdr(struct sk_buff *skb) +{ + return (struct virtio_vsock_hdr *)skb->head; +} + +static inline bool virtio_vsock_skb_reply(struct sk_buff *skb) +{ + return VIRTIO_VSOCK_SKB_CB(skb)->reply; +} + +static inline void virtio_vsock_skb_set_reply(struct sk_buff *skb) +{ + VIRTIO_VSOCK_SKB_CB(skb)->reply = true; +} + +static inline bool virtio_vsock_skb_tap_delivered(struct sk_buff *skb) +{ + return VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered; +} + +static inline void virtio_vsock_skb_set_tap_delivered(struct sk_buff *skb) +{ + VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = true; +} + +static inline void virtio_vsock_skb_clear_tap_delivered(struct sk_buff *skb) +{ + VIRTIO_VSOCK_SKB_CB(skb)->tap_delivered = false; +} + +static inline void virtio_vsock_skb_rx_put(struct sk_buff *skb) +{ + u32 len; + + len = le32_to_cpu(virtio_vsock_hdr(skb)->len); + + if (len > 0) + skb_put(skb, len); +} + +static inline struct sk_buff *virtio_vsock_alloc_skb(unsigned int size, gfp_t mask) +{ + struct sk_buff *skb; + + if (size < VIRTIO_VSOCK_SKB_HEADROOM) + return NULL; + + skb = alloc_skb(size, mask); + if (!skb) + return NULL; + + skb_reserve(skb, VIRTIO_VSOCK_SKB_HEADROOM); + return skb; +} + +static inline void +virtio_vsock_skb_queue_head(struct sk_buff_head *list, struct sk_buff *skb) +{ + spin_lock_bh(&list->lock); + __skb_queue_head(list, skb); + spin_unlock_bh(&list->lock); +} + +static inline void +virtio_vsock_skb_queue_tail(struct sk_buff_head *list, struct sk_buff *skb) +{ + spin_lock_bh(&list->lock); + __skb_queue_tail(list, skb); + spin_unlock_bh(&list->lock); +} + +static inline struct sk_buff *virtio_vsock_skb_dequeue(struct sk_buff_head *list) +{ + struct sk_buff *skb; + + spin_lock_bh(&list->lock); + skb = __skb_dequeue(list); + spin_unlock_bh(&list->lock); + + return skb; +} + +static inline void virtio_vsock_skb_queue_purge(struct sk_buff_head *list) +{ + spin_lock_bh(&list->lock); + __skb_queue_purge(list); + spin_unlock_bh(&list->lock); +} + +static inline size_t virtio_vsock_skb_len(struct sk_buff *skb) +{ + return (size_t)(skb_end_pointer(skb) - skb->head); +} + #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) @@ -35,23 +138,10 @@ struct virtio_vsock_sock { u32 last_fwd_cnt; u32 rx_bytes; u32 buf_alloc; - struct list_head rx_queue; + struct sk_buff_head rx_queue; u32 msg_count; }; -struct virtio_vsock_pkt { - struct virtio_vsock_hdr hdr; - struct list_head list; - /* socket refcnt not held, only use for cancellation */ - struct vsock_sock *vsk; - void *buf; - u32 buf_len; - u32 len; - u32 off; - bool reply; - bool tap_delivered; -}; - struct virtio_vsock_pkt_info { u32 remote_cid, remote_port; struct vsock_sock *vsk; @@ -68,7 +158,7 @@ struct virtio_transport { struct vsock_transport transport; /* Takes ownership of the packet */ - int (*send_pkt)(struct virtio_vsock_pkt *pkt); + int (*send_pkt)(struct sk_buff *skb); }; ssize_t @@ -149,11 +239,10 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, void virtio_transport_destruct(struct vsock_sock *vsk); void virtio_transport_recv_pkt(struct virtio_transport *t, - struct virtio_vsock_pkt *pkt); -void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); -void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt); + struct sk_buff *skb); +void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb); u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); -void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt); - +void virtio_transport_deliver_tap_pkt(struct sk_buff *skb); +int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *list); #endif /* _LINUX_VIRTIO_VSOCK_H */ -- cgit v1.2.3 From 373c612d72461ddaea223592df31e62c934aae61 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 11 Jan 2023 10:54:21 +0000 Subject: i2c: add fwnode APIs Add fwnode APIs for finding and getting I2C adapters, which will be used by the SFP code. These are passed the fwnode corresponding to the adapter, and return the I2C adapter. It is the responsibility of the caller to find the appropriate fwnode. We keep the DT and ACPI interfaces, but where appropriate, recode them to use the fwnode interfaces internally. Reviewed-by: Mika Westerberg Signed-off-by: Russell King (Oracle) Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index d84e0e99f084..500404d85141 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -965,15 +965,33 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr); #endif /* I2C */ +/* must call put_device() when done with returned i2c_client device */ +struct i2c_client *i2c_find_device_by_fwnode(struct fwnode_handle *fwnode); + +/* must call put_device() when done with returned i2c_adapter device */ +struct i2c_adapter *i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode); + +/* must call i2c_put_adapter() when done with returned i2c_adapter device */ +struct i2c_adapter *i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode); + #if IS_ENABLED(CONFIG_OF) /* must call put_device() when done with returned i2c_client device */ -struct i2c_client *of_find_i2c_device_by_node(struct device_node *node); +static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) +{ + return i2c_find_device_by_fwnode(of_fwnode_handle(node)); +} /* must call put_device() when done with returned i2c_adapter device */ -struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node); +static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node) +{ + return i2c_find_adapter_by_fwnode(of_fwnode_handle(node)); +} /* must call i2c_put_adapter() when done with returned i2c_adapter device */ -struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node); +static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node) +{ + return i2c_get_adapter_by_fwnode(of_fwnode_handle(node)); +} const struct of_device_id *i2c_of_match_device(const struct of_device_id *matches, -- cgit v1.2.3 From 08a764a7c51b01ccf8594a36b8d24b2d643bc5ed Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 14 Jan 2023 18:01:32 +0100 Subject: net: ethernet: mtk_wed: add reset/reset_complete callbacks Introduce reset and reset_complete wlan callback to schedule WLAN driver reset when ethernet/wed driver is resetting. Tested-by: Daniel Golle Co-developed-by: Sujuan Chen Signed-off-by: Sujuan Chen Signed-off-by: Lorenzo Bianconi Signed-off-by: Paolo Abeni --- include/linux/soc/mediatek/mtk_wed.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index a0746d4aec20..b939bd2750a0 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -151,6 +151,8 @@ struct mtk_wed_device { void (*release_rx_buf)(struct mtk_wed_device *wed); void (*update_wo_rx_stats)(struct mtk_wed_device *wed, struct mtk_wed_wo_rx_stats *stats); + int (*reset)(struct mtk_wed_device *wed); + void (*reset_complete)(struct mtk_wed_device *wed); } wlan; #endif }; -- cgit v1.2.3 From 8c3e24065e3d37ee4787e6c72aa0ab6460db7598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 22 Dec 2022 05:10:45 +0000 Subject: HID: usbhid: Make hid_is_usb() non-inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By making hid_is_usb() a non-inline function the lowlevel usbhid driver does not have to be exported anymore. Also mark the argument as const as it is not modified. Signed-off-by: Thomas Weißschuh Reviewed-by: David Rheinsberg Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- include/linux/hid.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 1eb5408599cd..c4cf9cea4377 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -866,10 +866,7 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, return hdev->ll_driver == driver; } -static inline bool hid_is_usb(struct hid_device *hdev) -{ - return hid_is_using_ll_driver(hdev, &usb_hid_driver); -} +extern bool hid_is_usb(const struct hid_device *hdev); #define PM_HINT_FULLON 1<<5 #define PM_HINT_NORMAL 1<<1 -- cgit v1.2.3 From 1d9ca84ce034960707b62ce7d5ca75d04f8db91a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 22 Dec 2022 05:10:46 +0000 Subject: HID: Remove unused function hid_is_using_ll_driver() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the last user was removed we can delete this function. Signed-off-by: Thomas Weißschuh Reviewed-by: David Rheinsberg Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- include/linux/hid.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index c4cf9cea4377..4dec31e8eadb 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -860,12 +860,6 @@ extern struct hid_ll_driver hidp_hid_driver; extern struct hid_ll_driver uhid_hid_driver; extern struct hid_ll_driver usb_hid_driver; -static inline bool hid_is_using_ll_driver(struct hid_device *hdev, - struct hid_ll_driver *driver) -{ - return hdev->ll_driver == driver; -} - extern bool hid_is_usb(const struct hid_device *hdev); #define PM_HINT_FULLON 1<<5 -- cgit v1.2.3 From 6dbe965a922f60471c4363f45c9e5d398310b811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 22 Dec 2022 05:10:47 +0000 Subject: HID: Unexport struct usb_hid_driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As no external users remain this implementation detail does not need to be exported anymore. Signed-off-by: Thomas Weißschuh Reviewed-by: David Rheinsberg Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 4dec31e8eadb..5b10c65e2bf6 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -858,7 +858,6 @@ struct hid_ll_driver { extern struct hid_ll_driver i2c_hid_ll_driver; extern struct hid_ll_driver hidp_hid_driver; extern struct hid_ll_driver uhid_hid_driver; -extern struct hid_ll_driver usb_hid_driver; extern bool hid_is_usb(const struct hid_device *hdev); -- cgit v1.2.3 From 8482aa2399e55b994e480a902ba86057f52887f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 22 Dec 2022 05:10:48 +0000 Subject: HID: Unexport struct uhid_hid_driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As there are no external users this implementation detail does not need to be exported. Signed-off-by: Thomas Weißschuh Reviewed-by: David Rheinsberg Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5b10c65e2bf6..95ce9eec2157 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -857,7 +857,6 @@ struct hid_ll_driver { extern struct hid_ll_driver i2c_hid_ll_driver; extern struct hid_ll_driver hidp_hid_driver; -extern struct hid_ll_driver uhid_hid_driver; extern bool hid_is_usb(const struct hid_device *hdev); -- cgit v1.2.3 From 9e3c2efcae8e5bb220b3ebfde5e27d40b96314c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 22 Dec 2022 05:10:49 +0000 Subject: HID: Unexport struct hidp_hid_driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As there are no external users this implementation detail does not need to be exported. Signed-off-by: Thomas Weißschuh Reviewed-by: David Rheinsberg Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 95ce9eec2157..c8a77b6c7985 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -856,7 +856,6 @@ struct hid_ll_driver { }; extern struct hid_ll_driver i2c_hid_ll_driver; -extern struct hid_ll_driver hidp_hid_driver; extern bool hid_is_usb(const struct hid_device *hdev); -- cgit v1.2.3 From ebb45d6bee4e65329cf1ff322ad1ca3013b3346d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 22 Dec 2022 05:10:50 +0000 Subject: HID: Unexport struct i2c_hid_ll_driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As there are no external users this implementation detail does not need to be exported. Signed-off-by: Thomas Weißschuh Reviewed-by: David Rheinsberg Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index c8a77b6c7985..620ae99fb166 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -855,8 +855,6 @@ struct hid_ll_driver { bool (*may_wakeup)(struct hid_device *hdev); }; -extern struct hid_ll_driver i2c_hid_ll_driver; - extern bool hid_is_usb(const struct hid_device *hdev); #define PM_HINT_FULLON 1<<5 -- cgit v1.2.3 From 52d22534690446d1f4b45436b570f77df3e855ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 22 Dec 2022 05:10:51 +0000 Subject: HID: Make lowlevel driver structs const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nothing is nor should be modifying these structs so mark them as const. Signed-off-by: Thomas Weißschuh Reviewed-by: David Rheinsberg Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 620ae99fb166..3922b66c6da8 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -596,7 +596,7 @@ struct hid_device { /* device report descriptor */ struct device dev; /* device */ struct hid_driver *driver; - struct hid_ll_driver *ll_driver; + const struct hid_ll_driver *ll_driver; struct mutex ll_open_lock; unsigned int ll_open_count; -- cgit v1.2.3 From 96ec2939620c48a503d9c89865c0c230d6f955e4 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Thu, 5 Jan 2023 22:51:23 +0800 Subject: Drivers: hv: Make remove callback of hyperv driver void returned Since commit fc7a6209d571 ("bus: Make remove callback return void") forces bus_type::remove be void-returned, it doesn't make much sense for any bus based driver implementing remove callbalk to return non-void to its caller. As such, change the remove function for Hyper-V VMBus based drivers to return void. Signed-off-by: Dawei Li Link: https://lore.kernel.org/r/TYCP286MB2323A93C55526E4DF239D3ACCAFA9@TYCP286MB2323.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Wei Liu --- include/linux/hyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 85f7c5a63aa6..cd5cb9f6fae0 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1273,7 +1273,7 @@ struct hv_driver { } dynids; int (*probe)(struct hv_device *, const struct hv_vmbus_device_id *); - int (*remove)(struct hv_device *); + void (*remove)(struct hv_device *dev); void (*shutdown)(struct hv_device *); int (*suspend)(struct hv_device *); -- cgit v1.2.3 From ade7fd908d710d0ab865c273df782c75528636ef Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 17 Jan 2023 13:43:09 +0100 Subject: efi: efivars: drop kobject from efivars_register() Since commit 0f5b2c69a4cb ("efi: vars: Remove deprecated 'efivars' sysfs interface") and the removal of the sysfs interface there are no users of the efivars kobject. Drop the kobject argument from efivars_register() and add a new efivar_is_available() helper in favour of the old efivars_kobject(). Note that the new helper uses the prefix 'efivar' (i.e. without an 's') for consistency with efivar_supports_writes() and the rest of the interface (except the registration functions). For the benefit of drivers with optional EFI support, also provide a dummy implementation of efivar_is_available(). Signed-off-by: Johan Hovold Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4b27519143f5..2124e55c02d6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1039,7 +1039,6 @@ struct efivar_operations { struct efivars { struct kset *kset; - struct kobject *kobject; const struct efivar_operations *ops; }; @@ -1053,10 +1052,14 @@ struct efivars { #define EFI_VAR_NAME_LEN 1024 int efivars_register(struct efivars *efivars, - const struct efivar_operations *ops, - struct kobject *kobject); + const struct efivar_operations *ops); int efivars_unregister(struct efivars *efivars); -struct kobject *efivars_kobject(void); + +#ifdef CONFIG_EFI +bool efivar_is_available(void); +#else +static inline bool efivar_is_available(void) { return false; } +#endif int efivar_supports_writes(void); -- cgit v1.2.3 From 2cf9e278efeff8f8bbb9580e2d6760e19795e310 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 17 Jan 2023 13:43:10 +0100 Subject: efi: efivars: make efivar_supports_writes() return bool For consistency with the new efivar_is_available() function, change the return type of efivar_supports_writes() to bool. Signed-off-by: Johan Hovold Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 2124e55c02d6..f6b107da1cbc 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1061,7 +1061,7 @@ bool efivar_is_available(void); static inline bool efivar_is_available(void) { return false; } #endif -int efivar_supports_writes(void); +bool efivar_supports_writes(void); int efivar_lock(void); int efivar_trylock(void); -- cgit v1.2.3 From cd702d18c882d5a4ea44bbdb38edd5d5577ef640 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 16 Jan 2023 16:22:15 +0200 Subject: usb: acpi: add helper to check port lpm capability using acpi _DSM Add a helper to evaluate ACPI usb device specific method (_DSM) provided in case the USB3 port shouldn't enter U1 and U2 link states. This _DSM was added as port specific retimer configuration may lead to exit latencies growing beyond U1/U2 exit limits, and OS needs a way to find which ports can't support U1/U2 link power management states. This _DSM is also used by windows: Link: https://docs.microsoft.com/en-us/windows-hardware/drivers/bringup/usb-device-specific-method---dsm- Some patch issues found in testing resolved by Ron Lee Cc: stable@vger.kernel.org Tested-by: Ron Lee Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230116142216.1141605-7-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 7d5325d47c45..04a7e94fb772 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -774,11 +774,14 @@ extern struct device *usb_intf_get_dma_device(struct usb_interface *intf); extern int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable); extern bool usb_acpi_power_manageable(struct usb_device *hdev, int index); +extern int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index); #else static inline int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable) { return 0; } static inline bool usb_acpi_power_manageable(struct usb_device *hdev, int index) { return true; } +static inline int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index) + { return 0; } #endif /* USB autosuspend and autoresume */ -- cgit v1.2.3 From 28e1ff70a08d331703f115534bd4278e11451439 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 21 Dec 2022 20:34:51 +0100 Subject: USB: Improve usb_fill_* documentation Document the transfer buffer requirement. That is, the buffer must be DMAble - otherwise data corruption might occur. Acked-by: Randy Dunlap Reviewed-by: Laurent Pinchart Signed-off-by: Ricardo Ribalda Acked-by: Alan Stern Link: https://lore.kernel.org/r/20221220-usb-dmadoc-v4-0-74a045bf14f4@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 7d5325d47c45..4e98ebacec96 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1626,14 +1626,25 @@ struct urb { * @urb: pointer to the urb to initialize. * @dev: pointer to the struct usb_device for this urb. * @pipe: the endpoint pipe - * @setup_packet: pointer to the setup_packet buffer - * @transfer_buffer: pointer to the transfer buffer + * @setup_packet: pointer to the setup_packet buffer. The buffer must be + * suitable for DMA. + * @transfer_buffer: pointer to the transfer buffer. The buffer must be + * suitable for DMA. * @buffer_length: length of the transfer buffer * @complete_fn: pointer to the usb_complete_t function * @context: what to set the urb context to. * * Initializes a control urb with the proper information needed to submit * it to a device. + * + * The transfer buffer and the setup_packet buffer will most likely be filled + * or read via DMA. The simplest way to get a buffer that can be DMAed to is + * allocating it via kmalloc() or equivalent, even for very small buffers. + * If the buffers are embedded in a bigger structure, there is a risk that + * the buffer itself, the previous fields and/or the next fields are corrupted + * due to cache incoherencies; or slowed down if they are evicted from the + * cache. For more information, check &struct urb. + * */ static inline void usb_fill_control_urb(struct urb *urb, struct usb_device *dev, @@ -1658,13 +1669,17 @@ static inline void usb_fill_control_urb(struct urb *urb, * @urb: pointer to the urb to initialize. * @dev: pointer to the struct usb_device for this urb. * @pipe: the endpoint pipe - * @transfer_buffer: pointer to the transfer buffer + * @transfer_buffer: pointer to the transfer buffer. The buffer must be + * suitable for DMA. * @buffer_length: length of the transfer buffer * @complete_fn: pointer to the usb_complete_t function * @context: what to set the urb context to. * * Initializes a bulk urb with the proper information needed to submit it * to a device. + * + * Refer to usb_fill_control_urb() for a description of the requirements for + * transfer_buffer. */ static inline void usb_fill_bulk_urb(struct urb *urb, struct usb_device *dev, @@ -1687,7 +1702,8 @@ static inline void usb_fill_bulk_urb(struct urb *urb, * @urb: pointer to the urb to initialize. * @dev: pointer to the struct usb_device for this urb. * @pipe: the endpoint pipe - * @transfer_buffer: pointer to the transfer buffer + * @transfer_buffer: pointer to the transfer buffer. The buffer must be + * suitable for DMA. * @buffer_length: length of the transfer buffer * @complete_fn: pointer to the usb_complete_t function * @context: what to set the urb context to. @@ -1697,6 +1713,9 @@ static inline void usb_fill_bulk_urb(struct urb *urb, * Initializes a interrupt urb with the proper information needed to submit * it to a device. * + * Refer to usb_fill_control_urb() for a description of the requirements for + * transfer_buffer. + * * Note that High Speed and SuperSpeed(+) interrupt endpoints use a logarithmic * encoding of the endpoint interval, and express polling intervals in * microframes (eight per millisecond) rather than in frames (one per -- cgit v1.2.3 From 599f008c257d913674b2b2f2fa9e273c1058ec2e Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Sat, 14 Jan 2023 01:32:44 -0800 Subject: usb: typec: tcpm: Add callbacks to mitigate wakeups due to contaminant On some of the TCPC implementations, when the Type-C port is exposed to contaminants, such as water, TCPC stops toggling while reporting OPEN either by the time TCPM reads CC pin status or during CC debounce window. This causes TCPM to be stuck in TOGGLING state. If TCPM is made to restart toggling, the behavior recurs causing redundant CPU wakeups till the USB-C port is free of contaminant. [206199.287817] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [206199.640337] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [206199.985789] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] (or) [ 7853.867577] Start toggling [ 7853.889921] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [ 7855.698765] CC1: 0 -> 0, CC2: 0 -> 5 [state TOGGLING, polarity 0, connected] [ 7855.698790] state change TOGGLING -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 7855.698826] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS] [ 7855.703559] CC1: 0 -> 0, CC2: 5 -> 5 [state SNK_ATTACH_WAIT, polarity 0, connected] [ 7855.856555] CC1: 0 -> 0, CC2: 5 -> 0 [state SNK_ATTACH_WAIT, polarity 0, disconnected] [ 7855.856581] state change SNK_ATTACH_WAIT -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 7855.856613] pending state change SNK_ATTACH_WAIT -> SNK_UNATTACHED @ 170 ms [rev3 NONE_AMS] [ 7856.027744] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [delayed 170 ms] [ 7856.181949] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [ 7856.187896] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [ 7857.645630] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [ 7857.647291] CC1: 0 -> 0, CC2: 0 -> 5 [state TOGGLING, polarity 0, connected] [ 7857.647298] state change TOGGLING -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 7857.647310] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS] [ 7857.808106] CC1: 0 -> 0, CC2: 5 -> 0 [state SNK_ATTACH_WAIT, polarity 0, disconnected] [ 7857.808123] state change SNK_ATTACH_WAIT -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 7857.808150] pending state change SNK_ATTACH_WAIT -> SNK_UNATTACHED @ 170 ms [rev3 NONE_AMS] [ 7857.978727] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [delayed 170 ms] To mitigate redundant TCPM wakeups, TCPCs which do have the needed hardware can implement the check_contaminant callback which is invoked by TCPM to evaluate for presence of contaminant. Lower level TCPC driver can restart toggling through TCPM_PORT_CLEAN event when the driver detects that USB-C port is free of contaminant. check_contaminant callback also passes the disconnect_while_debounce flag which when true denotes that the CC pins transitioned to OPEN state during the CC debounce window. Signed-off-by: Badhri Jagan Sridharan Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230114093246.1933321-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index bffc8d3e14ad..ab7ca872950b 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -114,6 +114,11 @@ enum tcpm_transmit_type { * Optional; The USB Communications Capable bit indicates if port * partner is capable of communication over the USB data lines * (e.g. D+/- or SS Tx/Rx). Called to notify the status of the bit. + * @check_contaminant: + * Optional; The callback is called when CC pins report open status + * at the end of the deboumce period or when the port is still + * toggling. Chip level drivers are expected to check for contaminant + * and call tcpm_clean_port when the port is clean. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -148,6 +153,7 @@ struct tcpc_dev { bool pps_active, u32 requested_vbus_voltage); bool (*is_vbus_vsafe0v)(struct tcpc_dev *dev); void (*set_partner_usb_comm_capable)(struct tcpc_dev *dev, bool enable); + void (*check_contaminant)(struct tcpc_dev *dev); }; struct tcpm_port; @@ -165,5 +171,7 @@ void tcpm_pd_transmit_complete(struct tcpm_port *port, enum tcpm_transmit_status status); void tcpm_pd_hard_reset(struct tcpm_port *port); void tcpm_tcpc_reset(struct tcpm_port *port); +void tcpm_port_clean(struct tcpm_port *port); +bool tcpm_port_is_toggling(struct tcpm_port *port); #endif /* __LINUX_USB_TCPM_H */ -- cgit v1.2.3 From abc028a270f47f86060ef479395d1bb8c2ab6e7e Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Sat, 14 Jan 2023 01:32:45 -0800 Subject: usb: typec: tcpci: Add callback for evaluating contaminant presence This change adds callback to evaluate presence of contaminant in the TCPCI layer. Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20230114093246.1933321-2-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpci.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 17657451c762..85e95a3251d3 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -188,6 +188,12 @@ struct tcpci; * Optional; The USB Communications Capable bit indicates if port * partner is capable of communication over the USB data lines * (e.g. D+/- or SS Tx/Rx). Called to notify the status of the bit. + * @check_contaminant: + * Optional; The callback is invoked when chiplevel drivers indicated + * that the USB port needs to be checked for contaminant presence. + * Chip level drivers are expected to check for contaminant and call + * tcpm_clean_port when the port is clean to put the port back into + * toggling state. */ struct tcpci_data { struct regmap *regmap; @@ -204,6 +210,7 @@ struct tcpci_data { void (*frs_sourcing_vbus)(struct tcpci *tcpci, struct tcpci_data *data); void (*set_partner_usb_comm_capable)(struct tcpci *tcpci, struct tcpci_data *data, bool capable); + void (*check_contaminant)(struct tcpci *tcpci, struct tcpci_data *data); }; struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data); -- cgit v1.2.3 From 11cefeb2058f9f2322713b2683be9301556705e2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 12 Dec 2022 16:20:35 +0100 Subject: USB: fix misleading usb_set_intfdata() kernel doc The struct device driver-data pointer is used for any data that a driver may need in various callbacks while bound to the device. For convenience, subsystems typically provide wrappers such as usb_set_intfdata() of the generic accessor functions for use in bus callbacks. There is generally no longer any need for a driver to clear the pointer, but since commit 0998d0631001 ("device-core: Ensure drvdata = NULL when no driver is bound") the driver-data pointer is set to NULL by driver core post unbind anyway. For historical reasons, USB core also clears this pointer when an explicitly claimed interface is released. Due to a misunderstanding, a misleading kernel doc comment for usb_set_intfdata() was recently added which claimed that the driver data pointer must not be cleared during disconnect before "all actions [are] completed", which is both imprecise and incorrect. Specifically, drivers like cdc-acm which claim additional interfaces use the driver-data pointer as a flag which is cleared when the first interface is unbound. As long as a driver does not do something odd like dereference the pointer in, for example, completion callbacks, this can be done at any time during disconnect. And in any case this is no different than for any other resource, like the driver data itself, which may be freed by the disconnect callback. Note that the comment actually also claimed that the interface itself was somehow being set to NULL by driver core. Fix the kernel doc by removing incorrect, overly specific and misleading details and adding a comment about why some drivers do clear the driver-data pointer. Fixes: 27ef17849779 ("usb: add usb_set_intfdata() documentation") Signed-off-by: Johan Hovold Acked-by: Alan Stern Acked-by: Vincent Mailhol Link: https://lore.kernel.org/r/20221212152035.31806-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 04a7e94fb772..86d1c8e79566 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -267,16 +267,15 @@ static inline void *usb_get_intfdata(struct usb_interface *intf) } /** - * usb_set_intfdata() - associate driver-specific data with the interface - * @intf: the usb interface - * @data: pointer to the device priv structure or %NULL + * usb_set_intfdata() - associate driver-specific data with an interface + * @intf: USB interface + * @data: driver data * - * Drivers should use this function in their probe() to associate their - * driver-specific data with the usb interface. + * Drivers can use this function in their probe() callbacks to associate + * driver-specific data with an interface. * - * When disconnecting, the core will take care of setting @intf back to %NULL, - * so no actions are needed on the driver side. The interface should not be set - * to %NULL before all actions completed (e.g. no outsanding URB remaining). + * Note that there is generally no need to clear the driver-data pointer even + * if some drivers do so for historical or implementation-specific reasons. */ static inline void usb_set_intfdata(struct usb_interface *intf, void *data) { -- cgit v1.2.3 From f7b3ea8cf72f3d6060fe08e461805181e7450a13 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 27 Dec 2022 10:29:04 +0800 Subject: genirq/affinity: Move group_cpus_evenly() into lib/ group_cpus_evenly() has become a generic function which can be used for other subsystems than the interrupt subsystem, so move it into lib/. Signed-off-by: Ming Lei Signed-off-by: Thomas Gleixner Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Link: https://lore.kernel.org/r/20221227022905.352674-6-ming.lei@redhat.com --- include/linux/group_cpus.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/group_cpus.h (limited to 'include/linux') diff --git a/include/linux/group_cpus.h b/include/linux/group_cpus.h new file mode 100644 index 000000000000..e42807ec61f6 --- /dev/null +++ b/include/linux/group_cpus.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2016 Thomas Gleixner. + * Copyright (C) 2016-2017 Christoph Hellwig. + */ + +#ifndef __LINUX_GROUP_CPUS_H +#define __LINUX_GROUP_CPUS_H +#include +#include + +struct cpumask *group_cpus_evenly(unsigned int numgrps); + +#endif -- cgit v1.2.3 From 5c5a7680e67ba6fbbb5f4d79fa41485450c1985c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 9 Dec 2022 16:09:14 +0100 Subject: platform: Provide a remove callback that returns no value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit struct platform_driver::remove returning an integer made driver authors expect that returning an error code was proper error handling. However the driver core ignores the error and continues to remove the device because there is nothing the core could do anyhow and reentering the remove callback again is only calling for trouble. So this is an source for errors typically yielding resource leaks in the error path. As there are too many platform drivers to neatly convert them all to return void in a single go, do it in several steps after this patch: a) Convert all drivers to implement .remove_new() returning void instead of .remove() returning int; b) Change struct platform_driver::remove() to return void and so make it identical to .remove_new(); c) Change all drivers back to .remove() now with the better prototype; d) drop struct platform_driver::remove_new(). While this touches all drivers eventually twice, steps a) and c) can be done one driver after another and so reduces coordination efforts immensely and simplifies review. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221209150914.3557650-1-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index b0d5a253156e..b845fd83f429 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -207,7 +207,18 @@ extern void platform_device_put(struct platform_device *pdev); struct platform_driver { int (*probe)(struct platform_device *); + + /* + * Traditionally the remove callback returned an int which however is + * ignored by the driver core. This led to wrong expectations by driver + * authors who thought returning an error code was a valid error + * handling strategy. To convert to a callback returning void, new + * drivers should implement .remove_new() until the conversion it done + * that eventually makes .remove() return void. + */ int (*remove)(struct platform_device *); + void (*remove_new)(struct platform_device *); + void (*shutdown)(struct platform_device *); int (*suspend)(struct platform_device *, pm_message_t state); int (*resume)(struct platform_device *); -- cgit v1.2.3 From 3dbdd92014a410778de8406e9e3253e353f51c2b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 28 Dec 2022 11:49:22 +0200 Subject: software node: Remove unused APIs There are no more users of software_node_register_nodes() and software_node_unregister_nodes(). Remove them. Signed-off-by: Andy Shevchenko Acked-by: Greg Kroah-Hartman Acked-by: Sakari Ailus Tested-by: Daniel Scally Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221228094922.84119-5-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 37179e3abad5..1ffd4f9bb67b 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -486,9 +486,6 @@ const struct software_node * software_node_find_by_name(const struct software_node *parent, const char *name); -int software_node_register_nodes(const struct software_node *nodes); -void software_node_unregister_nodes(const struct software_node *nodes); - int software_node_register_node_group(const struct software_node **node_group); void software_node_unregister_node_group(const struct software_node **node_group); -- cgit v1.2.3 From c3719bd9eeb2edf84bd263d662e36ca0ba262a23 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 4 Jan 2023 19:30:24 +0100 Subject: cacheinfo: Use RISC-V's init_cache_level() as generic OF implementation RISC-V's implementation of init_of_cache_level() is following the Devicetree Specification v0.3 regarding caches, cf.: - s3.7.3 'Internal (L1) Cache Properties' - s3.8 'Multi-level and Shared Cache Nodes' Allow reusing the implementation by moving it. Also make 'levels', 'leaves' and 'level' unsigned int. Signed-off-by: Pierre Gondois Reviewed-by: Conor Dooley Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230104183033.755668-2-pierre.gondois@arm.com Signed-off-by: Sudeep Holla --- include/linux/cacheinfo.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 00b7a6ae8617..ff0328f3fbb0 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -80,6 +80,7 @@ struct cpu_cacheinfo { struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); +int init_of_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); bool last_level_cache_is_valid(unsigned int cpu); -- cgit v1.2.3 From 504450d05c545a839b22542c72ddc388740462ac Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 10:23:30 +0100 Subject: driver core: bus.h: document bus notifiers better The bus notifier values are not documented all that well, so clean this up and make a real enumerated type for them and document them much better. When doing this, remove the hex values and just rely on the enumerated type instead as that is all that is needed. Reviewed-by: Randy Dunlap Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230111092331.3946745-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index d529f644e92b..7b4a48b5159b 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -257,21 +257,35 @@ extern int bus_register_notifier(struct bus_type *bus, extern int bus_unregister_notifier(struct bus_type *bus, struct notifier_block *nb); -/* All 4 notifers below get called with the target struct device * - * as an argument. Note that those functions are likely to be called - * with the device lock held in the core, so be careful. +/** + * enum bus_notifier_event - Bus Notifier events that have happened + * @BUS_NOTIFY_ADD_DEVICE: device is added to this bus + * @BUS_NOTIFY_DEL_DEVICE: device is about to be removed from this bus + * @BUS_NOTIFY_REMOVED_DEVICE: device is successfully removed from this bus + * @BUS_NOTIFY_BIND_DRIVER: a driver is about to be bound to this device on this bus + * @BUS_NOTIFY_BOUND_DRIVER: a driver is successfully bound to this device on this bus + * @BUS_NOTIFY_UNBIND_DRIVER: a driver is about to be unbound from this device on this bus + * @BUS_NOTIFY_UNBOUND_DRIVER: a driver is successfully unbound from this device on this bus + * @BUS_NOTIFY_DRIVER_NOT_BOUND: a driver failed to be bound to this device on this bus + * + * These are the value passed to a bus notifier when a specific event happens. + * + * Note that bus notifiers are likely to be called with the device lock already + * held by the driver core, so be careful in any notifier callback as to what + * you do with the device structure. + * + * All bus notifiers are called with the target struct device * as an argument. */ -#define BUS_NOTIFY_ADD_DEVICE 0x00000001 /* device added */ -#define BUS_NOTIFY_DEL_DEVICE 0x00000002 /* device to be removed */ -#define BUS_NOTIFY_REMOVED_DEVICE 0x00000003 /* device removed */ -#define BUS_NOTIFY_BIND_DRIVER 0x00000004 /* driver about to be - bound */ -#define BUS_NOTIFY_BOUND_DRIVER 0x00000005 /* driver bound to device */ -#define BUS_NOTIFY_UNBIND_DRIVER 0x00000006 /* driver about to be - unbound */ -#define BUS_NOTIFY_UNBOUND_DRIVER 0x00000007 /* driver is unbound - from the device */ -#define BUS_NOTIFY_DRIVER_NOT_BOUND 0x00000008 /* driver fails to be bound */ +enum bus_notifier_event { + BUS_NOTIFY_ADD_DEVICE, + BUS_NOTIFY_DEL_DEVICE, + BUS_NOTIFY_REMOVED_DEVICE, + BUS_NOTIFY_BIND_DRIVER, + BUS_NOTIFY_BOUND_DRIVER, + BUS_NOTIFY_UNBIND_DRIVER, + BUS_NOTIFY_UNBOUND_DRIVER, + BUS_NOTIFY_DRIVER_NOT_BOUND, +}; extern struct kset *bus_get_kset(struct bus_type *bus); -- cgit v1.2.3 From bd500361a937c03a3da57178287ce543c8f3681b Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 4 Jan 2023 19:30:28 +0100 Subject: ACPI: PPTT: Update acpi_find_last_cache_level() to acpi_get_cache_info() acpi_find_last_cache_level() allows to find the last level of cache for a given CPU. The function is only called on arm64 ACPI based platforms to check for cache information that would be missing in the CLIDR_EL1 register. To allow populating (struct cpu_cacheinfo).num_leaves by only parsing a PPTT, update acpi_find_last_cache_level() to get the 'split_levels', i.e. the number of cache levels being split in data/instruction caches. It is assumed that there will not be data/instruction caches above a unified cache. If a split level consist of one data cache and no instruction cache (or opposite), then the missing cache will still be populated by default with minimal cache information, and maximal cpumask (all non-existing caches have the same fw_token). Suggested-by: Jeremy Linton Signed-off-by: Pierre Gondois Reviewed-by: Jeremy Linton Acked-by: Rafael J. Wysocki Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230104183033.755668-6-pierre.gondois@arm.com Signed-off-by: Sudeep Holla --- include/linux/cacheinfo.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index ff0328f3fbb0..00d8e7f9d1c6 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -88,19 +88,22 @@ bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y); int detect_cache_attributes(unsigned int cpu); #ifndef CONFIG_ACPI_PPTT /* - * acpi_find_last_cache_level is only called on ACPI enabled + * acpi_get_cache_info() is only called on ACPI enabled * platforms using the PPTT for topology. This means that if * the platform supports other firmware configuration methods * we need to stub out the call when ACPI is disabled. * ACPI enabled platforms not using PPTT won't be making calls * to this function so we need not worry about them. */ -static inline int acpi_find_last_cache_level(unsigned int cpu) +static inline +int acpi_get_cache_info(unsigned int cpu, + unsigned int *levels, unsigned int *split_levels) { return 0; } #else -int acpi_find_last_cache_level(unsigned int cpu); +int acpi_get_cache_info(unsigned int cpu, + unsigned int *levels, unsigned int *split_levels); #endif const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); -- cgit v1.2.3 From 5944ce092b97caed5d86d961e963b883b5c44ee2 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 4 Jan 2023 19:30:29 +0100 Subject: arch_topology: Build cacheinfo from primary CPU commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection in the CPU hotplug path") adds a call to detect_cache_attributes() to populate the cacheinfo before updating the siblings mask. detect_cache_attributes() allocates memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT kernels, on secondary CPUs, this triggers a: 'BUG: sleeping function called from invalid context' [1] as the code is executed with preemption and interrupts disabled. The primary CPU was previously storing the cache information using the now removed (struct cpu_topology).llc_id: commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from the CPU topology") allocate_cache_info() tries to build the cacheinfo from the primary CPU prior secondary CPUs boot, if the DT/ACPI description contains cache information. If allocate_cache_info() fails, then fallback to the current state for the cacheinfo allocation. [1] will be triggered in such case. When unplugging a CPU, the cacheinfo memory cannot be freed. If it was, then the memory would be allocated early by the re-plugged CPU and would trigger [1]. Note that populate_cache_leaves() might be called multiple times due to populate_leaves being moved up. This is required since detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu) being allocated but not populated. [1]: | BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 | in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111 | preempt_count: 1, expected: 0 | RCU nest depth: 1, expected: 1 | 3 locks held by swapper/111/0: | #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8 | #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0 | #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80 | irq event stamp: 0 | hardirqs last enabled at (0): 0x0 | hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8 | softirqs last enabled at (0): copy_process+0x5dc/0x1ab8 | softirqs last disabled at (0): 0x0 | Preemption disabled at: | migrate_enable+0x30/0x130 | CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...] | Call trace: | __kmalloc+0xbc/0x1e8 | detect_cache_attributes+0x2d4/0x5f0 | update_siblings_masks+0x30/0x368 | store_cpu_topology+0x78/0xb8 | secondary_start_kernel+0xd0/0x198 | __secondary_switched+0xb0/0xb4 Signed-off-by: Pierre Gondois Reviewed-by: Sudeep Holla Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com Signed-off-by: Sudeep Holla --- include/linux/cacheinfo.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 00d8e7f9d1c6..dfef57077cd0 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -85,6 +85,7 @@ int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); bool last_level_cache_is_valid(unsigned int cpu); bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y); +int fetch_cache_info(unsigned int cpu); int detect_cache_attributes(unsigned int cpu); #ifndef CONFIG_ACPI_PPTT /* -- cgit v1.2.3 From 4cf7a136115e96241f9f1089d2b53c47accf3823 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Jan 2023 22:05:52 -0800 Subject: perf/core: Save the dynamic parts of sample data size The perf sample data can be divided into parts. The event->header_size and event->id_header_size keep the static part of the sample data which is determined by the sample_type flags. But other parts like CALLCHAIN and BRANCH_STACK are changing dynamically so it needs to see the actual data. In preparation of handling repeated calls for perf_prepare_sample(), it can save the dynamic size to the perf sample data to avoid the duplicate work. Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Tested-by: Jiri Olsa Acked-by: Jiri Olsa Acked-by: Song Liu Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230118060559.615653-2-namhyung@kernel.org --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 03949d017ac9..16b980014449 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1103,6 +1103,7 @@ struct perf_sample_data { */ u64 sample_flags; u64 period; + u64 dyn_size; /* * Fields commonly set by __perf_event_header__init_id(), @@ -1158,6 +1159,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, /* remaining struct members initialized in perf_prepare_sample() */ data->sample_flags = PERF_SAMPLE_PERIOD; data->period = period; + data->dyn_size = 0; if (addr) { data->addr = addr; -- cgit v1.2.3 From 31046500c1864b8ab25d1b9846ad10aa3f7b1821 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Jan 2023 22:05:53 -0800 Subject: perf/core: Add perf_sample_save_callchain() helper When we save the callchain to the perf sample data, we need to update the sample flags and the dynamic size. To ensure this is done consistently, add the perf_sample_save_callchain() helper and convert all call sites. Suggested-by: Peter Zijlstra Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Tested-by: Jiri Olsa Acked-by: Jiri Olsa Acked-by: Song Liu Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230118060559.615653-3-namhyung@kernel.org --- include/linux/perf_event.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 16b980014449..a9419608402b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1095,6 +1095,8 @@ int perf_event_read_local(struct perf_event *event, u64 *value, extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); +extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs); + struct perf_sample_data { /* @@ -1167,6 +1169,19 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, } } +static inline void perf_sample_save_callchain(struct perf_sample_data *data, + struct perf_event *event, + struct pt_regs *regs) +{ + int size = 1; + + data->callchain = perf_callchain(event, regs); + size += data->callchain->nr; + + data->dyn_size += size * sizeof(u64); + data->sample_flags |= PERF_SAMPLE_CALLCHAIN; +} + /* * Clear all bitfields in the perf_branch_entry. * The to and from fields are not cleared because they are @@ -1408,7 +1423,6 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark); -extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); extern struct perf_callchain_entry *get_callchain_entry(int *rctx); -- cgit v1.2.3 From 0a9081cf0a11770f6b0affd377db8caa3ec4c793 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Jan 2023 22:05:54 -0800 Subject: perf/core: Add perf_sample_save_raw_data() helper When we save the raw_data to the perf sample data, we need to update the sample flags and the dynamic size. To make sure this is done consistently, add the perf_sample_save_raw_data() helper and convert all call sites. Suggested-by: Peter Zijlstra Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Tested-by: Jiri Olsa Acked-by: Jiri Olsa Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230118060559.615653-4-namhyung@kernel.org --- include/linux/perf_event.h | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a9419608402b..569dfac5887f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -95,6 +95,11 @@ struct perf_raw_record { u32 size; }; +static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag) +{ + return frag->pad < sizeof(u64); +} + /* * branch stack layout: * nr: number of taken branches stored in entries[] @@ -1182,6 +1187,29 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data, data->sample_flags |= PERF_SAMPLE_CALLCHAIN; } +static inline void perf_sample_save_raw_data(struct perf_sample_data *data, + struct perf_raw_record *raw) +{ + struct perf_raw_frag *frag = &raw->frag; + u32 sum = 0; + int size; + + do { + sum += frag->size; + if (perf_raw_frag_last(frag)) + break; + frag = frag->next; + } while (1); + + size = round_up(sum + sizeof(u32), sizeof(u64)); + raw->size = size - sizeof(u32); + frag->pad = raw->size - sum; + + data->raw = raw; + data->dyn_size += size; + data->sample_flags |= PERF_SAMPLE_RAW; +} + /* * Clear all bitfields in the perf_branch_entry. * The to and from fields are not cleared because they are @@ -1690,11 +1718,6 @@ extern void perf_restore_debug_store(void); static inline void perf_restore_debug_store(void) { } #endif -static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag) -{ - return frag->pad < sizeof(u64); -} - #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) struct perf_pmu_events_attr { -- cgit v1.2.3 From eb55b455ef9c7123bdfa7e8a7f1ebeaa8034eb83 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Jan 2023 22:05:55 -0800 Subject: perf/core: Add perf_sample_save_brstack() helper When we saves the branch stack to the perf sample data, we needs to update the sample flags and the dynamic size. To make sure this is done consistently, add the perf_sample_save_brstack() helper and convert all call sites. Suggested-by: Peter Zijlstra Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Tested-by: Jiri Olsa Acked-by: Jiri Olsa Acked-by: Athira Rajeev Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230118060559.615653-5-namhyung@kernel.org --- include/linux/perf_event.h | 66 ++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 569dfac5887f..7db0e9cc2682 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1102,6 +1102,31 @@ extern u64 perf_event_read_value(struct perf_event *event, extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs); +static inline bool branch_sample_no_flags(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS; +} + +static inline bool branch_sample_no_cycles(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES; +} + +static inline bool branch_sample_type(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE; +} + +static inline bool branch_sample_hw_index(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + +static inline bool branch_sample_priv(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; +} + struct perf_sample_data { /* @@ -1210,6 +1235,21 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data, data->sample_flags |= PERF_SAMPLE_RAW; } +static inline void perf_sample_save_brstack(struct perf_sample_data *data, + struct perf_event *event, + struct perf_branch_stack *brs) +{ + int size = sizeof(u64); /* nr */ + + if (branch_sample_hw_index(event)) + size += sizeof(u64); + size += brs->nr * sizeof(struct perf_branch_entry); + + data->br_stack = brs; + data->dyn_size += size; + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; +} + /* * Clear all bitfields in the perf_branch_entry. * The to and from fields are not cleared because they are @@ -1827,30 +1867,4 @@ static inline void perf_lopwr_cb(bool mode) } #endif -#ifdef CONFIG_PERF_EVENTS -static inline bool branch_sample_no_flags(const struct perf_event *event) -{ - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS; -} - -static inline bool branch_sample_no_cycles(const struct perf_event *event) -{ - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES; -} - -static inline bool branch_sample_type(const struct perf_event *event) -{ - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE; -} - -static inline bool branch_sample_hw_index(const struct perf_event *event) -{ - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; -} - -static inline bool branch_sample_priv(const struct perf_event *event) -{ - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; -} -#endif /* CONFIG_PERF_EVENTS */ #endif /* _LINUX_PERF_EVENT_H */ -- cgit v1.2.3 From f6e707156e1d5d150f288823987bee1ba0104c4c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Jan 2023 22:05:58 -0800 Subject: perf/core: Introduce perf_prepare_header() Factor out perf_prepare_header() so that it can call perf_prepare_sample() without a header if not needed. Also it checks the filtered_sample_type to avoid duplicate work when perf_prepare_sample() is called twice (or more). Suggested-by: Peter Zijlstr Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Tested-by: Jiri Olsa Acked-by: Jiri Olsa Acked-by: Song Liu Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230118060559.615653-8-namhyung@kernel.org --- include/linux/perf_event.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7db0e9cc2682..d5628a7b5eaa 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1250,6 +1250,17 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data, data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; } +static inline u32 perf_sample_data_size(struct perf_sample_data *data, + struct perf_event *event) +{ + u32 size = sizeof(struct perf_event_header); + + size += event->header_size + event->id_header_size; + size += data->dyn_size; + + return size; +} + /* * Clear all bitfields in the perf_branch_entry. * The to and from fields are not cleared because they are @@ -1271,7 +1282,10 @@ extern void perf_output_sample(struct perf_output_handle *handle, struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event); -extern void perf_prepare_sample(struct perf_event_header *header, +extern void perf_prepare_sample(struct perf_sample_data *data, + struct perf_event *event, + struct pt_regs *regs); +extern void perf_prepare_header(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event, struct pt_regs *regs); -- cgit v1.2.3 From 19235e47279894b033a3ec5cf2732de634862b3a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Jan 2023 11:26:29 +0100 Subject: cpuidle, arm64: Fix the ARM64 cpuidle logic The recent cpuidle changes started triggering RCU splats on Juno development boards: | ============================= | WARNING: suspicious RCU usage | ----------------------------- | include/trace/events/ipi.h:19 suspicious rcu_dereference_check() usage! Fix cpuidle on ARM64: - ... by introducing a new 'is_rcu' flag to the cpuidle helpers & make ARM64 use it, as ARM64 wants to keep RCU active longer and wants to do the ct_cpuidle_enter()/exit() dance itself. - Also update the PSCI driver accordingly. - This also removes the last known RCU_NONIDLE() user as a bonus. Reported-by: Mark Rutland Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Tested-by: Sudeep Holla Tested-by: Mark Rutland Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/Y8Z31UbzG3LJgAXE@hirez.programming.kicks-ass.net -- --- include/linux/cpuidle.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 630c879143c7..3183aeb7f5b4 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -307,7 +307,7 @@ extern s64 cpuidle_governor_latency_req(unsigned int cpu); #define __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, \ idx, \ state, \ - is_retention) \ + is_retention, is_rcu) \ ({ \ int __ret = 0; \ \ @@ -319,9 +319,11 @@ extern s64 cpuidle_governor_latency_req(unsigned int cpu); if (!is_retention) \ __ret = cpu_pm_enter(); \ if (!__ret) { \ - ct_cpuidle_enter(); \ + if (!is_rcu) \ + ct_cpuidle_enter(); \ __ret = low_level_idle_enter(state); \ - ct_cpuidle_exit(); \ + if (!is_rcu) \ + ct_cpuidle_exit(); \ if (!is_retention) \ cpu_pm_exit(); \ } \ @@ -330,15 +332,21 @@ extern s64 cpuidle_governor_latency_req(unsigned int cpu); }) #define CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 0) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 0, 0) #define CPU_PM_CPU_IDLE_ENTER_RETENTION(low_level_idle_enter, idx) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 1) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 1, 0) #define CPU_PM_CPU_IDLE_ENTER_PARAM(low_level_idle_enter, idx, state) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 0) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 0, 0) + +#define CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(low_level_idle_enter, idx, state) \ + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 0, 1) #define CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(low_level_idle_enter, idx, state) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 1) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 1, 0) + +#define CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(low_level_idle_enter, idx, state) \ + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 1, 1) #endif /* _LINUX_CPUIDLE_H */ -- cgit v1.2.3 From abf08576afe31506b812c8c1be9714f78613f300 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:10 +0100 Subject: fs: port vfs_*() helpers to struct mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 066555ad1bf8..7aa302d2ce39 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1944,36 +1944,36 @@ bool inode_owner_or_capable(struct user_namespace *mnt_userns, /* * VFS helper functions.. */ -int vfs_create(struct user_namespace *, struct inode *, +int vfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); -int vfs_mkdir(struct user_namespace *, struct inode *, +int vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t); -int vfs_mknod(struct user_namespace *, struct inode *, struct dentry *, +int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); -int vfs_symlink(struct user_namespace *, struct inode *, +int vfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); -int vfs_link(struct dentry *, struct user_namespace *, struct inode *, +int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, struct dentry *, struct inode **); -int vfs_rmdir(struct user_namespace *, struct inode *, struct dentry *); -int vfs_unlink(struct user_namespace *, struct inode *, struct dentry *, +int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *); +int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *, struct inode **); /** * struct renamedata - contains all information required for renaming - * @old_mnt_userns: old user namespace of the mount the inode was found from + * @old_mnt_idmap: idmap of the old mount the inode was found from * @old_dir: parent of source * @old_dentry: source - * @new_mnt_userns: new user namespace of the mount the inode was found from + * @new_mnt_idmap: idmap of the new mount the inode was found from * @new_dir: parent of destination * @new_dentry: destination * @delegated_inode: returns an inode needing a delegation break * @flags: rename flags */ struct renamedata { - struct user_namespace *old_mnt_userns; + struct mnt_idmap *old_mnt_idmap; struct inode *old_dir; struct dentry *old_dentry; - struct user_namespace *new_mnt_userns; + struct mnt_idmap *new_mnt_idmap; struct inode *new_dir; struct dentry *new_dentry; struct inode **delegated_inode; @@ -1982,14 +1982,14 @@ struct renamedata { int vfs_rename(struct renamedata *); -static inline int vfs_whiteout(struct user_namespace *mnt_userns, +static inline int vfs_whiteout(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry) { - return vfs_mknod(mnt_userns, dir, dentry, S_IFCHR | WHITEOUT_MODE, + return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); } -struct file *vfs_tmpfile_open(struct user_namespace *mnt_userns, +struct file *vfs_tmpfile_open(struct mnt_idmap *idmap, const struct path *parentpath, umode_t mode, int open_flag, const struct cred *cred); @@ -2746,7 +2746,7 @@ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) } extern long vfs_truncate(const struct path *, loff_t); -int do_truncate(struct user_namespace *, struct dentry *, loff_t start, +int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len); @@ -2901,7 +2901,7 @@ static inline int bmap(struct inode *inode, sector_t *block) } #endif -int notify_change(struct user_namespace *, struct dentry *, +int notify_change(struct mnt_idmap *, struct dentry *, struct iattr *, struct inode **); int inode_permission(struct user_namespace *, struct inode *, int); int generic_permission(struct user_namespace *, struct inode *, int); -- cgit v1.2.3 From d3c8a33a5cadafb94355fb087fe87522b295189e Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 11 Oct 2022 17:28:10 -0700 Subject: net/mlx5: Add hardware extended range support for PTP adjtime and adjphase Capable hardware can use an extended range for offsetting the clock. An extended range of [-200000,200000] is used instead of [-32768,32767] for the delta/phase parameter of the adjtime/adjphase ptp_clock_info callbacks. Signed-off-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a84bdeeed2c6..0b102c651fe2 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9941,7 +9941,9 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x5d]; + u8 reserved_at_0[0x51]; + u8 mtutc_time_adjustment_extended_range[0x1]; + u8 reserved_at_52[0xb]; u8 mcia_32dwords[0x1]; u8 out_pulse_duration_ns[0x1]; u8 npps_period[0x1]; -- cgit v1.2.3 From 80baab88bb93eeaa133b426d24dfc0775a8cf824 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:45:51 -0800 Subject: iomap/gfs2: Unlock and put folio in page_done handler When an iomap defines a ->page_done() handler in its page_ops, delegate unlocking the folio and putting the folio reference to that handler. This allows to fix a race between journaled data writes and folio writeback in gfs2: before this change, gfs2_iomap_page_done() was called after unlocking the folio, so writeback could start writing back the folio's buffers before they could be marked for writing to the journal. Also, try_to_free_buffers() could free the buffers before gfs2_iomap_page_done() was done adding the buffers to the current current transaction. With this change, gfs2_iomap_page_done() adds the buffers to the current transaction while the folio is still locked, so the problems described above can no longer occur. The only current user of ->page_done() is gfs2, so other filesystems are not affected. To catch out any out-of-tree users, switch from a page to a folio in ->page_done(). Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 0983dfc9a203..743e2a909162 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -131,13 +131,14 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) * associated with them. * * When page_prepare succeeds, page_done will always be called to do any - * cleanup work necessary. In that page_done call, @page will be NULL if the - * associated page could not be obtained. + * cleanup work necessary. In that page_done call, @folio will be NULL if the + * associated folio could not be obtained. When folio is not NULL, page_done + * is responsible for unlocking and putting the folio. */ struct iomap_page_ops { int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, - struct page *page); + struct folio *folio); /* * Check that the cached iomap still maps correctly to the filesystem's -- cgit v1.2.3 From 40405dddd98a9a95585482af46b8e269f5ebe5df Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:45:51 -0800 Subject: iomap: Rename page_done handler to put_folio The ->page_done() handler in struct iomap_page_ops is now somewhat misnamed in that it mainly deals with unlocking and putting a folio, so rename it to ->put_folio(). Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 743e2a909162..ecf815b34d51 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -126,18 +126,18 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) /* * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare - * and page_done will be called for each page written to. This only applies to - * buffered writes as unbuffered writes will not typically have pages + * and put_folio will be called for each folio written to. This only applies + * to buffered writes as unbuffered writes will not typically have folios * associated with them. * - * When page_prepare succeeds, page_done will always be called to do any - * cleanup work necessary. In that page_done call, @folio will be NULL if the - * associated folio could not be obtained. When folio is not NULL, page_done + * When page_prepare succeeds, put_folio will always be called to do any + * cleanup work necessary. In that put_folio call, @folio will be NULL if the + * associated folio could not be obtained. When folio is not NULL, put_folio * is responsible for unlocking and putting the folio. */ struct iomap_page_ops { int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); - void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, + void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio); /* -- cgit v1.2.3 From 98321b5139f92a736a426404fb6e23bfb8feb9cc Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:49:04 -0800 Subject: iomap: Add iomap_get_folio helper Add an iomap_get_folio() helper that gets a folio reference based on an iomap iterator and an offset into the address space. Use it in iomap_write_begin(). Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index ecf815b34d51..188d14e786a4 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -261,6 +261,7 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); +struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, -- cgit v1.2.3 From 9060bc4d3aca6106bbe72891efba391d9d6b86e7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:49:12 -0800 Subject: iomap/gfs2: Get page in page_prepare handler Change the iomap ->page_prepare() handler to get and return a locked folio instead of doing that in iomap_write_begin(). This allows to recover from out-of-memory situations in ->page_prepare(), which eliminates the corresponding error handling code in iomap_write_begin(). The ->put_folio() handler now also isn't called with NULL as the folio value anymore. Filesystems are expected to use the iomap_get_folio() helper for getting locked folios in their ->page_prepare() handlers. Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 188d14e786a4..d50501781856 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -13,6 +13,7 @@ struct address_space; struct fiemap_extent_info; struct inode; +struct iomap_iter; struct iomap_dio; struct iomap_writepage_ctx; struct iov_iter; @@ -131,12 +132,12 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) * associated with them. * * When page_prepare succeeds, put_folio will always be called to do any - * cleanup work necessary. In that put_folio call, @folio will be NULL if the - * associated folio could not be obtained. When folio is not NULL, put_folio - * is responsible for unlocking and putting the folio. + * cleanup work necessary. put_folio is responsible for unlocking and putting + * @folio. */ struct iomap_page_ops { - int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); + struct folio *(*page_prepare)(struct iomap_iter *iter, loff_t pos, + unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio); -- cgit v1.2.3 From c82abc2394642490da736b1ba78671bbcef81150 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:50:25 -0800 Subject: iomap: Rename page_prepare handler to get_folio The ->page_prepare() handler in struct iomap_page_ops is now somewhat misnamed, so rename it to ->get_folio(). Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index d50501781856..da226032aedc 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -126,17 +126,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) } /* - * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare + * When a filesystem sets page_ops in an iomap mapping it returns, get_folio * and put_folio will be called for each folio written to. This only applies * to buffered writes as unbuffered writes will not typically have folios * associated with them. * - * When page_prepare succeeds, put_folio will always be called to do any + * When get_folio succeeds, put_folio will always be called to do any * cleanup work necessary. put_folio is responsible for unlocking and putting * @folio. */ struct iomap_page_ops { - struct folio *(*page_prepare)(struct iomap_iter *iter, loff_t pos, + struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio); -- cgit v1.2.3 From 471859f57d42537626a56312cfb50cd6acee09ae Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:50:44 -0800 Subject: iomap: Rename page_ops to folio_ops The operations in struct page_ops all operate on folios, so rename struct page_ops to struct folio_ops. Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig [djwong: port around not removing iomap_valid] Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index da226032aedc..c70a9d4fb447 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -86,7 +86,7 @@ struct vm_fault; */ #define IOMAP_NULL_ADDR -1ULL /* addr is not valid */ -struct iomap_page_ops; +struct iomap_folio_ops; struct iomap { u64 addr; /* disk offset of mapping, bytes */ @@ -98,7 +98,7 @@ struct iomap { struct dax_device *dax_dev; /* dax_dev for dax operations */ void *inline_data; void *private; /* filesystem private */ - const struct iomap_page_ops *page_ops; + const struct iomap_folio_ops *folio_ops; u64 validity_cookie; /* used with .iomap_valid() */ }; @@ -126,7 +126,7 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) } /* - * When a filesystem sets page_ops in an iomap mapping it returns, get_folio + * When a filesystem sets folio_ops in an iomap mapping it returns, get_folio * and put_folio will be called for each folio written to. This only applies * to buffered writes as unbuffered writes will not typically have folios * associated with them. @@ -135,7 +135,7 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) * cleanup work necessary. put_folio is responsible for unlocking and putting * @folio. */ -struct iomap_page_ops { +struct iomap_folio_ops { struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, -- cgit v1.2.3 From 4b9a3f49f02bf682eedfde23ef56a8df82c1e5d2 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 13 Jan 2023 10:09:32 +0100 Subject: HID: bpf: rework how programs are attached and stored in the kernel Previously, HID-BPF was relying on a bpf tracing program to be notified when a program was released from userspace. This is error prone, as LLVM sometimes inline the function and sometimes not. So instead of messing up with the bpf prog ref count, we can use the bpf_link concept which actually matches exactly what we want: - a bpf_link represents the fact that a given program is attached to a given HID device - as long as the bpf_link has fd opened (either by the userspace program still being around or by pinning the bpf object in the bpffs), the program stays attached to the HID device - once every user has closed the fd, we get called by hid_bpf_link_release() that we no longer have any users, and we can disconnect the program to the device in 2 passes: first atomically clear the bit saying that the link is active, and then calling release_work in a scheduled work item. This solves entirely the problems of BPF tracing not showing up and is definitely cleaner. Signed-off-by: Benjamin Tissoires Acked-by: Alexei Starovoitov Signed-off-by: Jiri Kosina --- include/linux/hid_bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 3ca85ab91325..e9afb61e6ee0 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -3,6 +3,7 @@ #ifndef __HID_BPF_H #define __HID_BPF_H +#include #include #include @@ -138,6 +139,12 @@ struct hid_bpf { spinlock_t progs_lock; /* protects RCU update of progs */ }; +/* specific HID-BPF link when a program is attached to a device */ +struct hid_bpf_link { + struct bpf_link link; + int hid_table_index; +}; + #ifdef CONFIG_HID_BPF u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 *size, int interrupt); -- cgit v1.2.3 From 04a42e72d77a93a166b79c34b7bc862f55a53967 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 14 Dec 2022 22:17:57 -0800 Subject: mm: move folio_set_compound_order() to mm/internal.h folio_set_compound_order() is moved to an mm-internal location so external folio users cannot misuse this function. Change the name of the function to folio_set_order() and use WARN_ON_ONCE() rather than BUG_ON. Also, handle the case if a non-large folio is passed and add clarifying comments to the function. Link: https://lore.kernel.org/lkml/20221207223731.32784-1-sidhartha.kumar@oracle.com/T/ Link: https://lkml.kernel.org/r/20221215061757.223440-1-sidhartha.kumar@oracle.com Fixes: 9fd330582b2f ("mm: add folio dtor and order setter functions") Signed-off-by: Sidhartha Kumar Suggested-by: Mike Kravetz Suggested-by: Muchun Song Suggested-by: Matthew Wilcox Suggested-by: John Hubbard Reviewed-by: John Hubbard Reviewed-by: Muchun Song Signed-off-by: Andrew Morton --- include/linux/mm.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8f857163ac89..253b2d7489e6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1019,22 +1019,6 @@ static inline void set_compound_order(struct page *page, unsigned int order) #endif } -/* - * folio_set_compound_order is generally passed a non-zero order to - * initialize a large folio. However, hugetlb code abuses this by - * passing in zero when 'dissolving' a large folio. - */ -static inline void folio_set_compound_order(struct folio *folio, - unsigned int order) -{ - VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); - - folio->_folio_order = order; -#ifdef CONFIG_64BIT - folio->_folio_nr_pages = order ? 1U << order : 0; -#endif -} - /* Returns the number of pages in this potentially compound page. */ static inline unsigned long compound_nr(struct page *page) { -- cgit v1.2.3 From 105ff5339f498af74e60d7662c8f1c4d21f1342d Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Thu, 15 Dec 2022 00:12:03 +0000 Subject: mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC The new MFD_NOEXEC_SEAL and MFD_EXEC flags allows application to set executable bit at creation time (memfd_create). When MFD_NOEXEC_SEAL is set, memfd is created without executable bit (mode:0666), and sealed with F_SEAL_EXEC, so it can't be chmod to be executable (mode: 0777) after creation. when MFD_EXEC flag is set, memfd is created with executable bit (mode:0777), this is the same as the old behavior of memfd_create. The new pid namespaced sysctl vm.memfd_noexec has 3 values: 0: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like MFD_EXEC was set. 1: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like MFD_NOEXEC_SEAL was set. 2: memfd_create() without MFD_NOEXEC_SEAL will be rejected. The sysctl allows finer control of memfd_create for old-software that doesn't set the executable bit, for example, a container with vm.memfd_noexec=1 means the old-software will create non-executable memfd by default. Also, the value of memfd_noexec is passed to child namespace at creation time. For example, if the init namespace has vm.memfd_noexec=2, all its children namespaces will be created with 2. [akpm@linux-foundation.org: add stub functions to fix build] [akpm@linux-foundation.org: remove unneeded register_pid_ns_ctl_table_vm() stub, per Jeff] [akpm@linux-foundation.org: s/pr_warn_ratelimited/pr_warn_once/, per review] [akpm@linux-foundation.org: fix CONFIG_SYSCTL=n warning] Link: https://lkml.kernel.org/r/20221215001205.51969-4-jeffxu@google.com Signed-off-by: Jeff Xu Co-developed-by: Daniel Verkamp Signed-off-by: Daniel Verkamp Reported-by: kernel test robot Reviewed-by: Kees Cook Cc: David Herrmann Cc: Dmitry Torokhov Cc: Hugh Dickins Cc: Jann Horn Cc: Jorge Lucangeli Obes Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/linux/pid_namespace.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 07481bb87d4e..c758809d5bcf 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -16,6 +16,21 @@ struct fs_pin; +#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) +/* + * sysctl for vm.memfd_noexec + * 0: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL + * acts like MFD_EXEC was set. + * 1: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL + * acts like MFD_NOEXEC_SEAL was set. + * 2: memfd_create() without MFD_NOEXEC_SEAL will be + * rejected. + */ +#define MEMFD_NOEXEC_SCOPE_EXEC 0 +#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1 +#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2 +#endif + struct pid_namespace { struct idr idr; struct rcu_head rcu; @@ -31,6 +46,10 @@ struct pid_namespace { struct ucounts *ucounts; int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; +#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) + /* sysctl for vm.memfd_noexec */ + int memfd_noexec_scope; +#endif } __randomize_layout; extern struct pid_namespace init_pid_ns; -- cgit v1.2.3 From fe7d4c6d5a42f5bdc63fdfdca2cad32c8a779e23 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:50:54 -0500 Subject: mm/hugetlb: document huge_pte_offset usage huge_pte_offset() is potentially a pgtable walker, looking up pte_t* for a hugetlb address. Normally, it's always safe to walk a generic pgtable as long as we're with the mmap lock held for either read or write, because that guarantees the pgtable pages will always be valid during the process. But it's not true for hugetlbfs, especially shared: hugetlbfs can have its pgtable freed by pmd unsharing, it means that even with mmap lock held for current mm, the PMD pgtable page can still go away from under us if pmd unsharing is possible during the walk. So we have two ways to make it safe even for a shared mapping: (1) If we're with the hugetlb vma lock held for either read/write, it's okay because pmd unshare cannot happen at all. (2) If we're with the i_mmap_rwsem lock held for either read/write, it's okay because even if pmd unshare can happen, the pgtable page cannot be freed from under us. Document it. Link: https://lkml.kernel.org/r/20221216155100.2043537-4-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Cc: Andrea Arcangeli Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Mike Kravetz Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 551834cd5299..d755e2a7c0db 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -192,6 +192,38 @@ extern struct list_head huge_boot_pages; pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz); +/* + * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. + * Returns the pte_t* if found, or NULL if the address is not mapped. + * + * Since this function will walk all the pgtable pages (including not only + * high-level pgtable page, but also PUD entry that can be unshared + * concurrently for VM_SHARED), the caller of this function should be + * responsible of its thread safety. One can follow this rule: + * + * (1) For private mappings: pmd unsharing is not possible, so holding the + * mmap_lock for either read or write is sufficient. Most callers + * already hold the mmap_lock, so normally, no special action is + * required. + * + * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged + * pgtable page can go away from under us! It can be done by a pmd + * unshare with a follow up munmap() on the other process), then we + * need either: + * + * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare + * won't happen upon the range (it also makes sure the pte_t we + * read is the right and stable one), or, + * + * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make + * sure even if unshare happened the racy unmap() will wait until + * i_mmap_rwsem is released. + * + * Option (2.1) is the safest, which guarantees pte stability from pmd + * sharing pov, until the vma lock released. Option (2.2) doesn't protect + * a concurrent pmd unshare, but it makes sure the pgtable page is safe to + * access. + */ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); unsigned long hugetlb_mask_last_page(struct hstate *h); -- cgit v1.2.3 From fcd48540d188876c917a377d81cd24c100332a62 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:50:55 -0500 Subject: mm/hugetlb: move swap entry handling into vma lock when faulted In hugetlb_fault(), there used to have a special path to handle swap entry at the entrance using huge_pte_offset(). That's unsafe because huge_pte_offset() for a pmd sharable range can access freed pgtables if without any lock to protect the pgtable from being freed after pmd unshare. Here the simplest solution to make it safe is to move the swap handling to be after the vma lock being held. We may need to take the fault mutex on either migration or hwpoison entries now (also the vma lock, but that's really needed), however neither of them is hot path. Note that the vma lock cannot be released in hugetlb_fault() when the migration entry is detected, because in migration_entry_wait_huge() the pgtable page will be used again (by taking the pgtable lock), so that also need to be protected by the vma lock. Modify migration_entry_wait_huge() so that it must be called with vma read lock held, and properly release the lock in __migration_entry_wait_huge(). Link: https://lkml.kernel.org/r/20221216155100.2043537-5-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: John Hubbard Cc: Andrea Arcangeli Cc: David Hildenbrand Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/swapops.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index b982dd614572..3a451b7afcb3 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -337,7 +337,8 @@ extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); #ifdef CONFIG_HUGETLB_PAGE -extern void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl); +extern void __migration_entry_wait_huge(struct vm_area_struct *vma, + pte_t *ptep, spinlock_t *ptl); extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte); #endif /* CONFIG_HUGETLB_PAGE */ #else /* CONFIG_MIGRATION */ @@ -366,7 +367,8 @@ static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } #ifdef CONFIG_HUGETLB_PAGE -static inline void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) { } +static inline void __migration_entry_wait_huge(struct vm_area_struct *vma, + pte_t *ptep, spinlock_t *ptl) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { } #endif /* CONFIG_HUGETLB_PAGE */ static inline int is_writable_migration_entry(swp_entry_t entry) -- cgit v1.2.3 From dd361e5033cf36c51acab996ea17748b81cedb38 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:52:26 -0500 Subject: mm/hugetlb: make walk_hugetlb_range() safe to pmd unshare Since walk_hugetlb_range() walks the pgtable, it needs the vma lock to make sure the pgtable page will not be freed concurrently. Link: https://lkml.kernel.org/r/20221216155226.2043738-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: John Hubbard Cc: Andrea Arcangeli Cc: David Hildenbrand Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/pagewalk.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index 959f52e5867d..27a6df448ee5 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -21,7 +21,16 @@ struct mm_walk; * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD. * Any folded depths (where PTRS_PER_P?D is equal to 1) * are skipped. - * @hugetlb_entry: if set, called for each hugetlb entry + * @hugetlb_entry: if set, called for each hugetlb entry. This hook + * function is called with the vma lock held, in order to + * protect against a concurrent freeing of the pte_t* or + * the ptl. In some cases, the hook function needs to drop + * and retake the vma lock in order to avoid deadlocks + * while calling other functions. In such cases the hook + * function must either refrain from accessing the pte or + * ptl after dropping the vma lock, or else revalidate + * those items after re-acquiring the vma lock and before + * accessing them. * @test_walk: caller specific callback function to determine whether * we walk over the current vma or not. Returning 0 means * "do page table walk over the current vma", returning -- cgit v1.2.3 From 9c67a20704e763f9cb8cd262c3e45de7bd2816bc Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:52:29 -0500 Subject: mm/hugetlb: introduce hugetlb_walk() huge_pte_offset() is the main walker function for hugetlb pgtables. The name is not really representing what it does, though. Instead of renaming it, introduce a wrapper function called hugetlb_walk() which will use huge_pte_offset() inside. Assert on the locks when walking the pgtable. Note, the vma lock assertion will be a no-op for private mappings. Document the last special case in the page_vma_mapped_walk() path where we don't need any more lock to call hugetlb_walk(). Taking vma lock there is not needed because either: (1) potential callers of hugetlb pvmw holds i_mmap_rwsem already (from one rmap_walk()), or (2) the caller will not walk a hugetlb vma at all so the hugetlb code path not reachable (e.g. in ksm or uprobe paths). It's slightly implicit for future page_vma_mapped_walk() callers on that lock requirement. But anyway, when one day this rule breaks, one will get a straightforward warning in hugetlb_walk() with lockdep, then there'll be a way out. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20221216155229.2043750-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Cc: Andrea Arcangeli Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d755e2a7c0db..b6b10101bea7 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -2,6 +2,7 @@ #ifndef _LINUX_HUGETLB_H #define _LINUX_HUGETLB_H +#include #include #include #include @@ -196,6 +197,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. * Returns the pte_t* if found, or NULL if the address is not mapped. * + * IMPORTANT: we should normally not directly call this function, instead + * this is only a common interface to implement arch-specific + * walker. Please use hugetlb_walk() instead, because that will attempt to + * verify the locking for you. + * * Since this function will walk all the pgtable pages (including not only * high-level pgtable page, but also PUD entry that can be unshared * concurrently for VM_SHARED), the caller of this function should be @@ -1229,4 +1235,35 @@ bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); #define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) #endif +static inline bool __vma_shareable_lock(struct vm_area_struct *vma) +{ + return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; +} + +/* + * Safe version of huge_pte_offset() to check the locks. See comments + * above huge_pte_offset(). + */ +static inline pte_t * +hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) +{ +#if defined(CONFIG_HUGETLB_PAGE) && \ + defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP) + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + /* + * If pmd sharing possible, locking needed to safely walk the + * hugetlb pgtables. More information can be found at the comment + * above huge_pte_offset() in the same file. + * + * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. + */ + if (__vma_shareable_lock(vma)) + WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && + !lockdep_is_held( + &vma->vm_file->f_mapping->i_mmap_rwsem)); +#endif + return huge_pte_offset(vma->vm_mm, addr, sz); +} + #endif /* _LINUX_HUGETLB_H */ -- cgit v1.2.3 From d685c668b0695dff927c85e27ef27d4f404f16a3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Dec 2022 21:43:51 +0000 Subject: buffer: add b_folio as an alias of b_page Patch series "Start converting buffer_heads to use folios". I was hoping that filesystems would convert from buffer_heads to iomap, but that's not happening particularly quickly. So the buffer_head infrastructure needs to be converted from being page-based to being folio-based. This patch (of 12): Buffer heads point to the allocation (ie the folio), not the page. This is currently the same thing for all filesystems that use buffer heads, so this is a safe transitional step. Link: https://lkml.kernel.org/r/20221215214402.3522366-1-willy@infradead.org Link: https://lkml.kernel.org/r/20221215214402.3522366-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jan Kara Signed-off-by: Andrew Morton --- include/linux/buffer_head.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 33fa5e94aa80..8f14dca5fed7 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -61,7 +61,10 @@ typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); struct buffer_head { unsigned long b_state; /* buffer state bitmap (see above) */ struct buffer_head *b_this_page;/* circular list of page's buffers */ - struct page *b_page; /* the page this bh is mapped to */ + union { + struct page *b_page; /* the page this bh is mapped to */ + struct folio *b_folio; /* the folio this bh is mapped to */ + }; sector_t b_blocknr; /* start block number */ size_t b_size; /* size of mapping */ -- cgit v1.2.3 From 6a6fe9ebd571a4092b7d5c1f11e4e1e15d296fa5 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 9 Dec 2022 10:06:18 +0800 Subject: mm: swap: convert mark_page_lazyfree() to folio_mark_lazyfree() mark_page_lazyfree() and the callers are converted to use folio, this rename and make it to take in a folio argument instead of calling page_folio(). Link: https://lkml.kernel.org/r/20221209020618.190306-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Vishal Moola (Oracle) Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2787b84eaf12..93f1cebd8545 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -402,7 +402,7 @@ extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); extern void deactivate_page(struct page *page); -extern void mark_page_lazyfree(struct page *page); +void folio_mark_lazyfree(struct folio *folio); extern void swap_setup(void); extern void lru_cache_add_inactive_or_unevictable(struct page *page, -- cgit v1.2.3 From 98def236f63c66629fb6b2d4b69cecffc5b46539 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 5 Dec 2022 23:08:20 +0000 Subject: mm/damon/core: implement damos filter Patch series "implement DAMOS filtering for anon pages and/or specific memory cgroups" DAMOS let users do system operations in a data access pattern oriented way. The data access pattern, which is extracted by DAMON, is somewhat accurate more than what user space could know in many cases. However, in some situation, users could know something more than the kernel about the pattern or some special requirements for some types of memory or processes. For example, some users would have slow swap devices and knows latency-ciritical processes and therefore want to use DAMON-based proactive reclamation (DAMON_RECLAIM) for only non-anonymous pages of non-latency-critical processes. For such restriction, users could exclude the memory regions from the initial monitoring regions and use non-dynamic monitoring regions update monitoring operations set including fvaddr and paddr. They could also adjust the DAMOS target access pattern. For dynamically changing memory layout and access pattern, those would be not enough. To help the case, add an interface, namely DAMOS filters, which can be used to avoid the DAMOS actions be applied to specific types of memory, to DAMON kernel API (damon.h). At the moment, it supports filtering anonymous pages and/or specific memory cgroups in or out for each DAMOS scheme. This patchset adds the support for all DAMOS actions that 'paddr' monitoring operations set supports ('pageout', 'lru_prio', and 'lru_deprio'), and the functionality is exposed via DAMON kernel API (damon.h) the DAMON sysfs interface (/sys/kernel/mm/damon/admins/), and DAMON_RECLAIM module parameters. Patches Sequence ---------------- First patch implements DAMOS filter interface to DAMON kernel API. Second patch makes the physical address space monitoring operations set to support the filters from all supporting DAMOS actions. Third patch adds anonymous pages filter support to DAMON_RECLAIM, and the fourth patch documents the DAMON_RECLAIM's new feature. Fifth to seventh patches implement DAMON sysfs files for support of the filters, and eighth patch connects the file to use DAMOS filters feature. Ninth patch adds simple self test cases for DAMOS filters of the sysfs interface. Finally, following two patches (tenth and eleventh) document the new features and interfaces. This patch (of 11): DAMOS lets users do system operation in a data access pattern oriented way. The data access pattern, which is extracted by DAMON, is somewhat accurate more than what user space could know in many cases. However, in some situation, users could know something more than the kernel about the pattern or some special requirements for some types of memory or processes. For example, some users would have slow swap devices and knows latency-ciritical processes and therefore want to use DAMON-based proactive reclamation (DAMON_RECLAIM) for only non-anonymous pages of non-latency-critical processes. For such restriction, users could exclude the memory regions from the initial monitoring regions and use non-dynamic monitoring regions update monitoring operations set including fvaddr and paddr. They could also adjust the DAMOS target access pattern. For dynamically changing memory layout and access pattern, those would be not enough. To help the case, add an interface, namely DAMOS filters, which can be used to avoid the DAMOS actions be applied to specific types of memory, to DAMON kernel API (damon.h). At the moment, it supports filtering anonymous pages and/or specific memory cgroups in or out for each DAMOS scheme. Note that this commit adds only the interface to the DAMON kernel API. The impelmentation should be made in the monitoring operations sets, and following commits will add that. Link: https://lkml.kernel.org/r/20221205230830.144349-1-sj@kernel.org Link: https://lkml.kernel.org/r/20221205230830.144349-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/linux/damon.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index ad15a5b88e3a..7907918ad2e0 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -8,6 +8,7 @@ #ifndef _DAMON_H_ #define _DAMON_H_ +#include #include #include #include @@ -215,6 +216,39 @@ struct damos_stat { unsigned long qt_exceeds; }; +/** + * enum damos_filter_type - Type of memory for &struct damos_filter + * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. + * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. + * @NR_DAMOS_FILTER_TYPES: Number of filter types. + */ +enum damos_filter_type { + DAMOS_FILTER_TYPE_ANON, + DAMOS_FILTER_TYPE_MEMCG, + NR_DAMOS_FILTER_TYPES, +}; + +/** + * struct damos_filter - DAMOS action target memory filter. + * @type: Type of the page. + * @matching: If the matching page should filtered out or in. + * @memcg_id: Memcg id of the question if @type is DAMOS_FILTER_MEMCG. + * @list: List head for siblings. + * + * Before applying the &damos->action to a memory region, DAMOS checks if each + * page of the region matches to this and avoid applying the action if so. + * Note that the check support is up to &struct damon_operations + * implementation. + */ +struct damos_filter { + enum damos_filter_type type; + bool matching; + union { + unsigned short memcg_id; + }; + struct list_head list; +}; + /** * struct damos_access_pattern - Target access pattern of the given scheme. * @min_sz_region: Minimum size of target regions. @@ -239,6 +273,7 @@ struct damos_access_pattern { * @action: &damo_action to be applied to the target regions. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. + * @filters: Additional set of &struct damos_filter for &action. * @stat: Statistics of this scheme. * @list: List head for siblings. * @@ -254,6 +289,10 @@ struct damos_access_pattern { * If all schemes that registered to a &struct damon_ctx are inactive, DAMON * stops monitoring and just repeatedly checks the watermarks. * + * Before applying the &action to a memory region, &struct damon_operations + * implementation could check pages of the region and skip &action to respect + * &filters + * * After applying the &action to each region, &stat_count and &stat_sz is * updated to reflect the number of regions and total size of regions that the * &action is applied. @@ -263,6 +302,7 @@ struct damos { enum damos_action action; struct damos_quota quota; struct damos_watermarks wmarks; + struct list_head filters; struct damos_stat stat; struct list_head list; }; @@ -516,6 +556,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r) #define damon_for_each_scheme_safe(s, next, ctx) \ list_for_each_entry_safe(s, next, &(ctx)->schemes, list) +#define damos_for_each_filter(f, scheme) \ + list_for_each_entry(f, &(scheme)->filters, list) + +#define damos_for_each_filter_safe(f, next, scheme) \ + list_for_each_entry_safe(f, next, &(scheme)->filters, list) + #ifdef CONFIG_DAMON struct damon_region *damon_new_region(unsigned long start, unsigned long end); @@ -536,6 +582,11 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); +struct damos_filter *damos_new_filter(enum damos_filter_type type, + bool matching); +void damos_add_filter(struct damos *s, struct damos_filter *f); +void damos_destroy_filter(struct damos_filter *f); + struct damos *damon_new_scheme(struct damos_access_pattern *pattern, enum damos_action action, struct damos_quota *quota, struct damos_watermarks *wmarks); -- cgit v1.2.3 From 44383cef54c0ce1201f884d83cc2b367bc5aa4f7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 19 Dec 2022 19:09:18 +0100 Subject: kasan: allow sampling page_alloc allocations for HW_TAGS As Hardware Tag-Based KASAN is intended to be used in production, its performance impact is crucial. As page_alloc allocations tend to be big, tagging and checking all such allocations can introduce a significant slowdown. Add two new boot parameters that allow to alleviate that slowdown: - kasan.page_alloc.sample, which makes Hardware Tag-Based KASAN tag only every Nth page_alloc allocation with the order configured by the second added parameter (default: tag every such allocation). - kasan.page_alloc.sample.order, which makes sampling enabled by the first parameter only affect page_alloc allocations with the order equal or greater than the specified value (default: 3, see below). The exact performance improvement caused by using the new parameters depends on their values and the applied workload. The chosen default value for kasan.page_alloc.sample.order is 3, which matches both PAGE_ALLOC_COSTLY_ORDER and SKB_FRAG_PAGE_ORDER. This is done for two reasons: 1. PAGE_ALLOC_COSTLY_ORDER is "the order at which allocations are deemed costly to service", which corresponds to the idea that only large and thus costly allocations are supposed to sampled. 2. One of the workloads targeted by this patch is a benchmark that sends a large amount of data over a local loopback connection. Most multi-page data allocations in the networking subsystem have the order of SKB_FRAG_PAGE_ORDER (or PAGE_ALLOC_COSTLY_ORDER). When running a local loopback test on a testing MTE-enabled device in sync mode, enabling Hardware Tag-Based KASAN introduces a ~50% slowdown. Applying this patch and setting kasan.page_alloc.sampling to a value higher than 1 allows to lower the slowdown. The performance improvement saturates around the sampling interval value of 10 with the default sampling page order of 3. This lowers the slowdown to ~20%. The slowdown in real scenarios involving the network will likely be better. Enabling page_alloc sampling has a downside: KASAN misses bad accesses to a page_alloc allocation that has not been tagged. This lowers the value of KASAN as a security mitigation. However, based on measuring the number of page_alloc allocations of different orders during boot in a test build, sampling with the default kasan.page_alloc.sample.order value affects only ~7% of allocations. The rest ~93% of allocations are still checked deterministically. Link: https://lkml.kernel.org/r/129da0614123bb85ed4dd61ae30842b2dd7c903f.1671471846.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Jann Horn Cc: Mark Brand Cc: Peter Collingbourne Signed-off-by: Andrew Morton --- include/linux/kasan.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 96c9d56e5510..5ebbaf672009 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -120,12 +120,13 @@ static __always_inline void kasan_poison_pages(struct page *page, __kasan_poison_pages(page, order, init); } -void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init); -static __always_inline void kasan_unpoison_pages(struct page *page, +bool __kasan_unpoison_pages(struct page *page, unsigned int order, bool init); +static __always_inline bool kasan_unpoison_pages(struct page *page, unsigned int order, bool init) { if (kasan_enabled()) - __kasan_unpoison_pages(page, order, init); + return __kasan_unpoison_pages(page, order, init); + return false; } void __kasan_cache_create_kmalloc(struct kmem_cache *cache); @@ -249,8 +250,11 @@ static __always_inline bool kasan_check_byte(const void *addr) static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_poison_pages(struct page *page, unsigned int order, bool init) {} -static inline void kasan_unpoison_pages(struct page *page, unsigned int order, - bool init) {} +static inline bool kasan_unpoison_pages(struct page *page, unsigned int order, + bool init) +{ + return false; +} static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct slab *slab) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, -- cgit v1.2.3 From 831978e37e93bd3e36612917a4b193278950daff Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Wed, 21 Dec 2022 14:00:52 +0800 Subject: maple_tree: remove extra space and blank line Patch series "Clean up and refinement for maple tree", v2. This patchset cleans up and refines some maple tree code. A few small changes make the code easier to understand and for better readability. This patch (of 7): These extra space and blank lines are unnecessary, so drop them. Link: https://lkml.kernel.org/r/20221221060058.609003-1-vernon2gm@gmail.com Link: https://lkml.kernel.org/r/20221221060058.609003-2-vernon2gm@gmail.com Signed-off-by: Vernon Yang Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index e594db58a0f1..4ee5a969441c 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -517,7 +517,6 @@ static inline void mas_reset(struct ma_state *mas) * entry. * * Note: may return the zero entry. - * */ #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) @@ -639,7 +638,6 @@ static inline void mt_set_in_rcu(struct maple_tree *mt) } static inline unsigned int mt_height(const struct maple_tree *mt) - { return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET; } -- cgit v1.2.3 From eabb305293835b191ffe60234587ae8bf5e4e9fd Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Wed, 21 Dec 2022 14:00:56 +0800 Subject: maple_tree: remove the redundant code The macros CONFIG_DEBUG_MAPLE_TREE_VERBOSE no one uses, functions mas_dup_tree() and mas_dup_store() are not implemented, just function declaration, so drop it. Link: https://lkml.kernel.org/r/20221221060058.609003-6-vernon2gm@gmail.com Signed-off-by: Vernon Yang Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 4ee5a969441c..815a27661517 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -12,7 +12,6 @@ #include #include /* #define CONFIG_MAPLE_RCU_DISABLED */ -/* #define CONFIG_DEBUG_MAPLE_TREE_VERBOSE */ /* * Allocated nodes are mutable until they have been inserted into the tree, @@ -483,9 +482,6 @@ static inline bool mas_is_paused(struct ma_state *mas) return mas->node == MAS_PAUSE; } -void mas_dup_tree(struct ma_state *oldmas, struct ma_state *mas); -void mas_dup_store(struct ma_state *mas, void *entry); - /* * This finds an empty area from the highest address to the lowest. * AKA "Topdown" version, -- cgit v1.2.3 From 318e9342fbbb6888d903d86e83865609901a1c65 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 21 Dec 2022 10:08:45 -0800 Subject: mm/memory: add vm_normal_folio() Patch series "Convert deactivate_page() to folio_deactivate()", v4. Deactivate_page() has already been converted to use folios. This patch series modifies the callers of deactivate_page() to use folios. It also introduces vm_normal_folio() to assist with folio conversions, and converts deactivate_page() to folio_deactivate() which takes in a folio. This patch (of 4): Introduce a wrapper function called vm_normal_folio(). This function calls vm_normal_page() and returns the folio of the page found, or null if no page is found. This function allows callers to get a folio from a pte, which will eventually allow them to completely replace their struct page variables with struct folio instead. Link: https://lkml.kernel.org/r/20221221180848.20774-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20221221180848.20774-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcox (Oracle) Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 253b2d7489e6..8e14183dfc58 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1968,6 +1968,8 @@ static inline bool can_do_mlock(void) { return false; } extern int user_shm_lock(size_t, struct ucounts *); extern void user_shm_unlock(size_t, struct ucounts *); +struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr, + pte_t pte); struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, -- cgit v1.2.3 From 5a9e34747c9f731bbb6b7fd7521c4fec0d840593 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 21 Dec 2022 10:08:48 -0800 Subject: mm/swap: convert deactivate_page() to folio_deactivate() Deactivate_page() has already been converted to use folios, this change converts it to take in a folio argument instead of calling page_folio(). It also renames the function folio_deactivate() to be more consistent with other folio functions. [akpm@linux-foundation.org: fix left-over comments, per Yu Zhao] Link: https://lkml.kernel.org/r/20221221180848.20774-5-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 93f1cebd8545..87cecb8c0bdc 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -401,7 +401,7 @@ extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); -extern void deactivate_page(struct page *page); +void folio_deactivate(struct folio *folio); void folio_mark_lazyfree(struct folio *folio); extern void swap_setup(void); -- cgit v1.2.3 From 0b7b8704ddcee372099a2bc6781db6ab273a85d5 Mon Sep 17 00:00:00 2001 From: Hao Sun Date: Wed, 21 Dec 2022 22:42:45 +0800 Subject: mm: new primitive kvmemdup() Similar to kmemdup(), but support large amount of bytes with kvmalloc() and does *not* guarantee that the result will be physically contiguous. Use only in cases where kvmalloc() is needed and free it with kvfree(). Also adapt policy_unpack.c in case someone bisect into this. Link: https://lkml.kernel.org/r/20221221144245.27164-1-sunhao.th@gmail.com Signed-off-by: Hao Sun Suggested-by: Daniel Borkmann Cc: Nick Terrell Cc: John Johansen Cc: Paul Moore Cc: James Morris Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- include/linux/string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index db28802ab0a6..c062c581a98b 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -177,6 +177,7 @@ extern char *kstrdup(const char *s, gfp_t gfp) __malloc; extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); +extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); extern char **argv_split(gfp_t gfp, const char *str, int *argcp); -- cgit v1.2.3 From b5054174ac7c7d8fae15deee7ddc0e20fd604f30 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 21 Dec 2022 21:24:54 +0000 Subject: mm: move FOLL_* defs to mm_types.h Move FOLL_* definitions to linux/mm_types.h to make them more accessible without having to drag in all of linux/mm.h and everything that drags in too[1]. Link: https://lkml.kernel.org/r/2161258.1671657894@warthog.procyon.org.uk Signed-off-by: David Howells Suggested-by: Matthew Wilcox Reviewed-by: John Hubbard Cc: Christoph Hellwig Cc: Al Viro Signed-off-by: Andrew Morton --- include/linux/mm.h | 75 ------------------------------------------------ include/linux/mm_types.h | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8e14183dfc58..d68579bf8484 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3057,81 +3057,6 @@ static inline vm_fault_t vmf_error(int err) struct page *follow_page(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags); -#define FOLL_WRITE 0x01 /* check pte is writable */ -#define FOLL_TOUCH 0x02 /* mark page accessed */ -#define FOLL_GET 0x04 /* do get_page on page */ -#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ -#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ -#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO - * and return without waiting upon it */ -#define FOLL_NOFAULT 0x80 /* do not fault in pages */ -#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ -#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ -#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ -#define FOLL_ANON 0x8000 /* don't do file mappings */ -#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ -#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ -#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ -#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ -#define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */ -#define FOLL_INTERRUPTIBLE 0x200000 /* allow interrupts from generic signals */ - -/* - * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each - * other. Here is what they mean, and how to use them: - * - * FOLL_LONGTERM indicates that the page will be held for an indefinite time - * period _often_ under userspace control. This is in contrast to - * iov_iter_get_pages(), whose usages are transient. - * - * FIXME: For pages which are part of a filesystem, mappings are subject to the - * lifetime enforced by the filesystem and we need guarantees that longterm - * users like RDMA and V4L2 only establish mappings which coordinate usage with - * the filesystem. Ideas for this coordination include revoking the longterm - * pin, delaying writeback, bounce buffer page writeback, etc. As FS DAX was - * added after the problem with filesystems was found FS DAX VMAs are - * specifically failed. Filesystem pages are still subject to bugs and use of - * FOLL_LONGTERM should be avoided on those pages. - * - * FIXME: Also NOTE that FOLL_LONGTERM is not supported in every GUP call. - * Currently only get_user_pages() and get_user_pages_fast() support this flag - * and calls to get_user_pages_[un]locked are specifically not allowed. This - * is due to an incompatibility with the FS DAX check and - * FAULT_FLAG_ALLOW_RETRY. - * - * In the CMA case: long term pins in a CMA region would unnecessarily fragment - * that region. And so, CMA attempts to migrate the page before pinning, when - * FOLL_LONGTERM is specified. - * - * FOLL_PIN indicates that a special kind of tracking (not just page->_refcount, - * but an additional pin counting system) will be invoked. This is intended for - * anything that gets a page reference and then touches page data (for example, - * Direct IO). This lets the filesystem know that some non-file-system entity is - * potentially changing the pages' data. In contrast to FOLL_GET (whose pages - * are released via put_page()), FOLL_PIN pages must be released, ultimately, by - * a call to unpin_user_page(). - * - * FOLL_PIN is similar to FOLL_GET: both of these pin pages. They use different - * and separate refcounting mechanisms, however, and that means that each has - * its own acquire and release mechanisms: - * - * FOLL_GET: get_user_pages*() to acquire, and put_page() to release. - * - * FOLL_PIN: pin_user_pages*() to acquire, and unpin_user_pages to release. - * - * FOLL_PIN and FOLL_GET are mutually exclusive for a given function call. - * (The underlying pages may experience both FOLL_GET-based and FOLL_PIN-based - * calls applied to them, and that's perfectly OK. This is a constraint on the - * callers, not on the pages.) - * - * FOLL_PIN should be set internally by the pin_user_pages*() APIs, never - * directly by the caller. That's in order to help avoid mismatches when - * releasing pages: get_user_pages*() pages must be released via put_page(), - * while pin_user_pages*() pages must be released via unpin_user_page(). - * - * Please see Documentation/core-api/pin_user_pages.rst for more information. - */ - static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) { if (vm_fault & VM_FAULT_OOM) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9757067c3053..1118e381fcdc 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1085,4 +1085,79 @@ enum fault_flag { typedef unsigned int __bitwise zap_flags_t; +/* + * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each + * other. Here is what they mean, and how to use them: + * + * FOLL_LONGTERM indicates that the page will be held for an indefinite time + * period _often_ under userspace control. This is in contrast to + * iov_iter_get_pages(), whose usages are transient. + * + * FIXME: For pages which are part of a filesystem, mappings are subject to the + * lifetime enforced by the filesystem and we need guarantees that longterm + * users like RDMA and V4L2 only establish mappings which coordinate usage with + * the filesystem. Ideas for this coordination include revoking the longterm + * pin, delaying writeback, bounce buffer page writeback, etc. As FS DAX was + * added after the problem with filesystems was found FS DAX VMAs are + * specifically failed. Filesystem pages are still subject to bugs and use of + * FOLL_LONGTERM should be avoided on those pages. + * + * FIXME: Also NOTE that FOLL_LONGTERM is not supported in every GUP call. + * Currently only get_user_pages() and get_user_pages_fast() support this flag + * and calls to get_user_pages_[un]locked are specifically not allowed. This + * is due to an incompatibility with the FS DAX check and + * FAULT_FLAG_ALLOW_RETRY. + * + * In the CMA case: long term pins in a CMA region would unnecessarily fragment + * that region. And so, CMA attempts to migrate the page before pinning, when + * FOLL_LONGTERM is specified. + * + * FOLL_PIN indicates that a special kind of tracking (not just page->_refcount, + * but an additional pin counting system) will be invoked. This is intended for + * anything that gets a page reference and then touches page data (for example, + * Direct IO). This lets the filesystem know that some non-file-system entity is + * potentially changing the pages' data. In contrast to FOLL_GET (whose pages + * are released via put_page()), FOLL_PIN pages must be released, ultimately, by + * a call to unpin_user_page(). + * + * FOLL_PIN is similar to FOLL_GET: both of these pin pages. They use different + * and separate refcounting mechanisms, however, and that means that each has + * its own acquire and release mechanisms: + * + * FOLL_GET: get_user_pages*() to acquire, and put_page() to release. + * + * FOLL_PIN: pin_user_pages*() to acquire, and unpin_user_pages to release. + * + * FOLL_PIN and FOLL_GET are mutually exclusive for a given function call. + * (The underlying pages may experience both FOLL_GET-based and FOLL_PIN-based + * calls applied to them, and that's perfectly OK. This is a constraint on the + * callers, not on the pages.) + * + * FOLL_PIN should be set internally by the pin_user_pages*() APIs, never + * directly by the caller. That's in order to help avoid mismatches when + * releasing pages: get_user_pages*() pages must be released via put_page(), + * while pin_user_pages*() pages must be released via unpin_user_page(). + * + * Please see Documentation/core-api/pin_user_pages.rst for more information. + */ + +#define FOLL_WRITE 0x01 /* check pte is writable */ +#define FOLL_TOUCH 0x02 /* mark page accessed */ +#define FOLL_GET 0x04 /* do get_page on page */ +#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ +#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ +#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO + * and return without waiting upon it */ +#define FOLL_NOFAULT 0x80 /* do not fault in pages */ +#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ +#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ +#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ +#define FOLL_ANON 0x8000 /* don't do file mappings */ +#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ +#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ +#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ +#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ +#define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */ +#define FOLL_INTERRUPTIBLE 0x200000 /* allow interrupts from generic signals */ + #endif /* _LINUX_MM_TYPES_H */ -- cgit v1.2.3 From 391655fe08d1f942359a11148aa9aaf3f99d6d6f Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:18:59 -0700 Subject: mm: multi-gen LRU: rename lru_gen_struct to lru_gen_folio Patch series "mm: multi-gen LRU: memcg LRU", v3. Overview ======== An memcg LRU is a per-node LRU of memcgs. It is also an LRU of LRUs, since each node and memcg combination has an LRU of folios (see mem_cgroup_lruvec()). Its goal is to improve the scalability of global reclaim, which is critical to system-wide memory overcommit in data centers. Note that memcg reclaim is currently out of scope. Its memory bloat is a pointer to each lruvec and negligible to each pglist_data. In terms of traversing memcgs during global reclaim, it improves the best-case complexity from O(n) to O(1) and does not affect the worst-case complexity O(n). Therefore, on average, it has a sublinear complexity in contrast to the current linear complexity. The basic structure of an memcg LRU can be understood by an analogy to the active/inactive LRU (of folios): 1. It has the young and the old (generations), i.e., the counterparts to the active and the inactive; 2. The increment of max_seq triggers promotion, i.e., the counterpart to activation; 3. Other events trigger similar operations, e.g., offlining an memcg triggers demotion, i.e., the counterpart to deactivation. In terms of global reclaim, it has two distinct features: 1. Sharding, which allows each thread to start at a random memcg (in the old generation) and improves parallelism; 2. Eventual fairness, which allows direct reclaim to bail out at will and reduces latency without affecting fairness over some time. The commit message in patch 6 details the workflow: https://lore.kernel.org/r/20221222041905.2431096-7-yuzhao@google.com/ The following is a simple test to quickly verify its effectiveness. Test design: 1. Create multiple memcgs. 2. Each memcg contains a job (fio). 3. All jobs access the same amount of memory randomly. 4. The system does not experience global memory pressure. 5. Periodically write to the root memory.reclaim. Desired outcome: 1. All memcgs have similar pgsteal counts, i.e., stddev(pgsteal) over mean(pgsteal) is close to 0%. 2. The total pgsteal is close to the total requested through memory.reclaim, i.e., sum(pgsteal) over sum(requested) is close to 100%. Actual outcome [1]: MGLRU off MGLRU on stddev(pgsteal) / mean(pgsteal) 75% 20% sum(pgsteal) / sum(requested) 425% 95% #################################################################### MEMCGS=128 for ((memcg = 0; memcg < $MEMCGS; memcg++)); do mkdir /sys/fs/cgroup/memcg$memcg done start() { echo $BASHPID > /sys/fs/cgroup/memcg$memcg/cgroup.procs fio -name=memcg$memcg --numjobs=1 --ioengine=mmap \ --filename=/dev/zero --size=1920M --rw=randrw \ --rate=64m,64m --random_distribution=random \ --fadvise_hint=0 --time_based --runtime=10h \ --group_reporting --minimal } for ((memcg = 0; memcg < $MEMCGS; memcg++)); do start & done sleep 600 for ((i = 0; i < 600; i++)); do echo 256m >/sys/fs/cgroup/memory.reclaim sleep 6 done for ((memcg = 0; memcg < $MEMCGS; memcg++)); do grep "pgsteal " /sys/fs/cgroup/memcg$memcg/memory.stat done #################################################################### [1]: This was obtained from running the above script (touches less than 256GB memory) on an EPYC 7B13 with 512GB DRAM for over an hour. This patch (of 8): The new name lru_gen_folio will be more distinct from the coming lru_gen_memcg. Link: https://lkml.kernel.org/r/20221222041905.2431096-1-yuzhao@google.com Link: https://lkml.kernel.org/r/20221222041905.2431096-2-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Michael Larabel Cc: Michal Hocko Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 4 ++-- include/linux/mmzone.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index ff3f3f23f649..177b8b1dd43c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -178,7 +178,7 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli int zone = folio_zonenum(folio); int delta = folio_nr_pages(folio); enum lru_list lru = type * LRU_INACTIVE_FILE; - struct lru_gen_struct *lrugen = &lruvec->lrugen; + struct lru_gen_folio *lrugen = &lruvec->lrugen; VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS); VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS); @@ -224,7 +224,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, int gen = folio_lru_gen(folio); int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); - struct lru_gen_struct *lrugen = &lruvec->lrugen; + struct lru_gen_folio *lrugen = &lruvec->lrugen; VM_WARN_ON_ONCE_FOLIO(gen != -1, folio); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index cd28a100d9e4..1686fcc4ed01 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -404,7 +404,7 @@ enum { * The number of pages in each generation is eventually consistent and therefore * can be transiently negative when reset_batch_size() is pending. */ -struct lru_gen_struct { +struct lru_gen_folio { /* the aging increments the youngest generation number */ unsigned long max_seq; /* the eviction increments the oldest generation numbers */ @@ -461,7 +461,7 @@ struct lru_gen_mm_state { struct lru_gen_mm_walk { /* the lruvec under reclaim */ struct lruvec *lruvec; - /* unstable max_seq from lru_gen_struct */ + /* unstable max_seq from lru_gen_folio */ unsigned long max_seq; /* the next address within an mm to scan */ unsigned long next_addr; @@ -524,7 +524,7 @@ struct lruvec { unsigned long flags; #ifdef CONFIG_LRU_GEN /* evictable pages divided into generations */ - struct lru_gen_struct lrugen; + struct lru_gen_folio lrugen; /* to concurrently iterate lru_gen_mm_list */ struct lru_gen_mm_state mm_state; #endif -- cgit v1.2.3 From 6df1b2212950aae2b2188c6645ea18e2a9e3fdd5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:00 -0700 Subject: mm: multi-gen LRU: rename lrugen->lists[] to lrugen->folios[] lru_gen_folio will be chained into per-node lists by the coming lrugen->list. Link: https://lkml.kernel.org/r/20221222041905.2431096-3-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Michael Larabel Cc: Michal Hocko Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 4 ++-- include/linux/mmzone.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 177b8b1dd43c..eb8a2435ee80 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -256,9 +256,9 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, lru_gen_update_size(lruvec, folio, -1, gen); /* for folio_rotate_reclaimable() */ if (reclaiming) - list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]); + list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]); else - list_add(&folio->lru, &lrugen->lists[gen][type][zone]); + list_add(&folio->lru, &lrugen->folios[gen][type][zone]); return true; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1686fcc4ed01..6c96ee823dbd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -312,7 +312,7 @@ enum lruvec_flags { * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the * corresponding generation. The gen counter in folio->flags stores gen+1 while - * a page is on one of lrugen->lists[]. Otherwise it stores 0. + * a page is on one of lrugen->folios[]. Otherwise it stores 0. * * A page is added to the youngest generation on faulting. The aging needs to * check the accessed bit at least twice before handing this page over to the @@ -324,8 +324,8 @@ enum lruvec_flags { * rest of generations, if they exist, are considered inactive. See * lru_gen_is_active(). * - * PG_active is always cleared while a page is on one of lrugen->lists[] so that - * the aging needs not to worry about it. And it's set again when a page + * PG_active is always cleared while a page is on one of lrugen->folios[] so + * that the aging needs not to worry about it. And it's set again when a page * considered active is isolated for non-reclaiming purposes, e.g., migration. * See lru_gen_add_folio() and lru_gen_del_folio(). * @@ -412,7 +412,7 @@ struct lru_gen_folio { /* the birth time of each generation in jiffies */ unsigned long timestamps[MAX_NR_GENS]; /* the multi-gen LRU lists, lazily sorted on eviction */ - struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + struct list_head folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the multi-gen LRU sizes, eventually consistent */ long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the exponential moving average of refaulted */ -- cgit v1.2.3 From e4dde56cd208674ce899b47589f263499e5b8cdc Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:04 -0700 Subject: mm: multi-gen LRU: per-node lru_gen_folio lists For each node, memcgs are divided into two generations: the old and the young. For each generation, memcgs are randomly sharded into multiple bins to improve scalability. For each bin, an RCU hlist_nulls is virtually divided into three segments: the head, the tail and the default. An onlining memcg is added to the tail of a random bin in the old generation. The eviction starts at the head of a random bin in the old generation. The per-node memcg generation counter, whose reminder (mod 2) indexes the old generation, is incremented when all its bins become empty. There are four operations: 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its current generation (old or young) and updates its "seg" to "head"; 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its current generation (old or young) and updates its "seg" to "tail"; 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old generation, updates its "gen" to "old" and resets its "seg" to "default"; 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the young generation, updates its "gen" to "young" and resets its "seg" to "default". The events that trigger the above operations are: 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; 2. The first attempt to reclaim an memcg below low, which triggers MEMCG_LRU_TAIL; 3. The first attempt to reclaim an memcg below reclaimable size threshold, which triggers MEMCG_LRU_TAIL; 4. The second attempt to reclaim an memcg below reclaimable size threshold, which triggers MEMCG_LRU_YOUNG; 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. Note that memcg LRU only applies to global reclaim, and the round-robin incrementing of their max_seq counters ensures the eventual fairness to all eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). Link: https://lkml.kernel.org/r/20221222041905.2431096-7-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Michael Larabel Cc: Michal Hocko Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 10 ++++ include/linux/mm_inline.h | 17 +++++++ include/linux/mmzone.h | 117 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 142 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d3c8203cab6c..2e08b05bc6bf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -794,6 +794,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg) percpu_ref_put(&objcg->refcnt); } +static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) +{ + return !memcg || css_tryget(&memcg->css); +} + static inline void mem_cgroup_put(struct mem_cgroup *memcg) { if (memcg) @@ -1301,6 +1306,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg) { } +static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) +{ + return true; +} + static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index eb8a2435ee80..acf03147fff8 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -122,6 +122,18 @@ static inline bool lru_gen_in_fault(void) return current->in_lru_fault; } +#ifdef CONFIG_MEMCG +static inline int lru_gen_memcg_seg(struct lruvec *lruvec) +{ + return READ_ONCE(lruvec->lrugen.seg); +} +#else +static inline int lru_gen_memcg_seg(struct lruvec *lruvec) +{ + return 0; +} +#endif + static inline int lru_gen_from_seq(unsigned long seq) { return seq % MAX_NR_GENS; @@ -297,6 +309,11 @@ static inline bool lru_gen_in_fault(void) return false; } +static inline int lru_gen_memcg_seg(struct lruvec *lruvec) +{ + return 0; +} + static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { return false; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6c96ee823dbd..815c7c2edf45 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -367,6 +368,15 @@ struct page_vma_mapped_walk; #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) +/* see the comment on MEMCG_NR_GENS */ +enum { + MEMCG_LRU_NOP, + MEMCG_LRU_HEAD, + MEMCG_LRU_TAIL, + MEMCG_LRU_OLD, + MEMCG_LRU_YOUNG, +}; + #ifdef CONFIG_LRU_GEN enum { @@ -426,6 +436,14 @@ struct lru_gen_folio { atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; /* whether the multi-gen LRU is enabled */ bool enabled; +#ifdef CONFIG_MEMCG + /* the memcg generation this lru_gen_folio belongs to */ + u8 gen; + /* the list segment this lru_gen_folio belongs to */ + u8 seg; + /* per-node lru_gen_folio list for global reclaim */ + struct hlist_nulls_node list; +#endif }; enum { @@ -479,12 +497,87 @@ void lru_gen_init_lruvec(struct lruvec *lruvec); void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); #ifdef CONFIG_MEMCG + +/* + * For each node, memcgs are divided into two generations: the old and the + * young. For each generation, memcgs are randomly sharded into multiple bins + * to improve scalability. For each bin, the hlist_nulls is virtually divided + * into three segments: the head, the tail and the default. + * + * An onlining memcg is added to the tail of a random bin in the old generation. + * The eviction starts at the head of a random bin in the old generation. The + * per-node memcg generation counter, whose reminder (mod MEMCG_NR_GENS) indexes + * the old generation, is incremented when all its bins become empty. + * + * There are four operations: + * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its + * current generation (old or young) and updates its "seg" to "head"; + * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its + * current generation (old or young) and updates its "seg" to "tail"; + * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old + * generation, updates its "gen" to "old" and resets its "seg" to "default"; + * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the + * young generation, updates its "gen" to "young" and resets its "seg" to + * "default". + * + * The events that trigger the above operations are: + * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; + * 2. The first attempt to reclaim an memcg below low, which triggers + * MEMCG_LRU_TAIL; + * 3. The first attempt to reclaim an memcg below reclaimable size threshold, + * which triggers MEMCG_LRU_TAIL; + * 4. The second attempt to reclaim an memcg below reclaimable size threshold, + * which triggers MEMCG_LRU_YOUNG; + * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; + * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; + * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. + * + * Note that memcg LRU only applies to global reclaim, and the round-robin + * incrementing of their max_seq counters ensures the eventual fairness to all + * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + */ +#define MEMCG_NR_GENS 2 +#define MEMCG_NR_BINS 8 + +struct lru_gen_memcg { + /* the per-node memcg generation counter */ + unsigned long seq; + /* each memcg has one lru_gen_folio per node */ + unsigned long nr_memcgs[MEMCG_NR_GENS]; + /* per-node lru_gen_folio list for global reclaim */ + struct hlist_nulls_head fifo[MEMCG_NR_GENS][MEMCG_NR_BINS]; + /* protects the above */ + spinlock_t lock; +}; + +void lru_gen_init_pgdat(struct pglist_data *pgdat); + void lru_gen_init_memcg(struct mem_cgroup *memcg); void lru_gen_exit_memcg(struct mem_cgroup *memcg); -#endif +void lru_gen_online_memcg(struct mem_cgroup *memcg); +void lru_gen_offline_memcg(struct mem_cgroup *memcg); +void lru_gen_release_memcg(struct mem_cgroup *memcg); +void lru_gen_rotate_memcg(struct lruvec *lruvec, int op); + +#else /* !CONFIG_MEMCG */ + +#define MEMCG_NR_GENS 1 + +struct lru_gen_memcg { +}; + +static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) +{ +} + +#endif /* CONFIG_MEMCG */ #else /* !CONFIG_LRU_GEN */ +static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) +{ +} + static inline void lru_gen_init_lruvec(struct lruvec *lruvec) { } @@ -494,6 +587,7 @@ static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) } #ifdef CONFIG_MEMCG + static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) { } @@ -501,7 +595,24 @@ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) { } -#endif + +static inline void lru_gen_online_memcg(struct mem_cgroup *memcg) +{ +} + +static inline void lru_gen_offline_memcg(struct mem_cgroup *memcg) +{ +} + +static inline void lru_gen_release_memcg(struct mem_cgroup *memcg) +{ +} + +static inline void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) +{ +} + +#endif /* CONFIG_MEMCG */ #endif /* CONFIG_LRU_GEN */ @@ -1243,6 +1354,8 @@ typedef struct pglist_data { #ifdef CONFIG_LRU_GEN /* kswap mm walk data */ struct lru_gen_mm_walk mm_walk; + /* lru_gen_folio list */ + struct lru_gen_memcg memcg_lru; #endif CACHELINE_PADDING(_pad2_); -- cgit v1.2.3 From 1ef488edd6c4d447784710974f049628c2890481 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Dec 2022 16:56:16 +0100 Subject: mm/mprotect: drop pgprot_t parameter from change_protection() Being able to provide a custom protection opens the door for inconsistencies and BUGs: for example, accidentally allowing for more permissions than desired by other mechanisms (e.g., softdirty tracking). vma->vm_page_prot should be the single source of truth. Only PROT_NUMA is special: there is no way we can erroneously allow for more permissions when removing all permissions. Special-case using the MM_CP_PROT_NUMA flag. [david@redhat.com: PAGE_NONE might not be defined without CONFIG_NUMA_BALANCING] Link: https://lkml.kernel.org/r/5084ff1c-ebb3-f918-6a60-bacabf550a88@redhat.com Link: https://lkml.kernel.org/r/20221223155616.297723-3-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Nadav Amit Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index d68579bf8484..329ed67edd76 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2134,8 +2134,7 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, pte_t pte); extern unsigned long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgprot_t newprot, - unsigned long cp_flags); + unsigned long end, unsigned long cp_flags); extern int mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags); -- cgit v1.2.3 From 3783e1721b650588938d28e4a084a1c9748361c8 Mon Sep 17 00:00:00 2001 From: Kele Huang Date: Sat, 24 Dec 2022 01:02:33 -0500 Subject: mm: fix comment of page table counter Commit af5b0f6a09e42 ("mm: consolidate page table accounting") consolidates page table accounting to a single counter in struct mm_struct {} as mm->pgtables_bytes. So the meanning of this counter should be the size of all page tables now. Link: https://lkml.kernel.org/r/20221224060233.417827-1-kele.huang@columbia.edu Signed-off-by: Kele Huang Cc: Arnd Bergmann Cc: Colin Cross Cc: David Hildenbrand Cc: Hugh Dickins Cc: Liam Howlett Cc: Matthew Wilcox (Oracle) Cc: Pasha Tatashin Cc: Peter Xu Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1118e381fcdc..10b6eb311ede 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -647,7 +647,7 @@ struct mm_struct { atomic_t mm_count; #ifdef CONFIG_MMU - atomic_long_t pgtables_bytes; /* PTE page table pages */ + atomic_long_t pgtables_bytes; /* size of all page tables */ #endif int map_count; /* number of VMAs */ -- cgit v1.2.3 From cff61bbc717bfddd6e433fe142b8e70b21546a1d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Dec 2022 06:10:29 -1000 Subject: jbd2,ocfs2: move jbd2_journal_submit_inode_data_buffers to ocfs2 jbd2_journal_submit_inode_data_buffers is only used by ocfs2, so move it there to prepare for removing generic_writepages. Link: https://lkml.kernel.org/r/20221229161031.391878-5-hch@lst.de Signed-off-by: Christoph Hellwig Cc: Jan Kara Cc: Joel Becker Cc: Joseph Qi Cc: Konstantin Komarov Cc: Mark Fasheh Cc: Matthew Wilcox Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- include/linux/jbd2.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 2170e0cc279d..5962072a4b19 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1570,8 +1570,6 @@ extern int jbd2_journal_inode_ranged_write(handle_t *handle, extern int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *inode, loff_t start_byte, loff_t length); -extern int jbd2_journal_submit_inode_data_buffers( - struct jbd2_inode *jinode); extern int jbd2_journal_finish_inode_data_buffers( struct jbd2_inode *jinode); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, -- cgit v1.2.3 From c2ca7a59a4199059556b57cfdf98fcf46039ca6b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Dec 2022 06:10:31 -1000 Subject: mm: remove generic_writepages Now that all external callers are gone, just fold it into do_writepages. Link: https://lkml.kernel.org/r/20221229161031.391878-7-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Cc: Joel Becker Cc: Joseph Qi Cc: Konstantin Komarov Cc: Mark Fasheh Cc: Matthew Wilcox Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- include/linux/writeback.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 06f9291b6fd5..2554b71765e9 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -369,8 +369,6 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb); typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); -int generic_writepages(struct address_space *mapping, - struct writeback_control *wbc); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); int write_cache_pages(struct address_space *mapping, -- cgit v1.2.3 From becacb04fdd439d7d1f2a93739161706a2e3e947 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 30 Dec 2022 15:08:42 +0800 Subject: mm: memcg: add folio_memcg_check() Patch series "mm: convert page_idle/damon to use folios", v4. This patch (of 8): Convert page_memcg_check() into folio_memcg_check() and add a page_memcg_check() wrapper. The behaviour of page_memcg_check() is unchanged; tail pages always had a NULL ->memcg_data. Link: https://lkml.kernel.org/r/20221230070849.63358-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20221230070849.63358-2-wangkefeng.wang@huawei.com Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Kefeng Wang Cc: David Hildenbrand Cc: SeongJae Park Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2e08b05bc6bf..26667bf16da5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -466,34 +466,34 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) } /* - * page_memcg_check - get the memory cgroup associated with a page - * @page: a pointer to the page struct + * folio_memcg_check - Get the memory cgroup associated with a folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function unlike page_memcg() can take any page - * as an argument. It has to be used in cases when it's not known if a page + * Returns a pointer to the memory cgroup associated with the folio, + * or NULL. This function unlike folio_memcg() can take any folio + * as an argument. It has to be used in cases when it's not known if a folio * has an associated memory cgroup pointer or an object cgroups vector or * an object cgroup. * - * For a non-kmem page any of the following ensures page and memcg binding + * For a non-kmem folio any of the following ensures folio and memcg binding * stability: * - * - the page lock + * - the folio lock * - LRU isolation - * - lock_page_memcg() + * - lock_folio_memcg() * - exclusive reference * - mem_cgroup_trylock_pages() * - * For a kmem page a caller should hold an rcu read lock to protect memcg - * associated with a kmem page from being released. + * For a kmem folio a caller should hold an rcu read lock to protect memcg + * associated with a kmem folio from being released. */ -static inline struct mem_cgroup *page_memcg_check(struct page *page) +static inline struct mem_cgroup *folio_memcg_check(struct folio *folio) { /* - * Because page->memcg_data might be changed asynchronously - * for slab pages, READ_ONCE() should be used here. + * Because folio->memcg_data might be changed asynchronously + * for slabs, READ_ONCE() should be used here. */ - unsigned long memcg_data = READ_ONCE(page->memcg_data); + unsigned long memcg_data = READ_ONCE(folio->memcg_data); if (memcg_data & MEMCG_DATA_OBJCGS) return NULL; @@ -508,6 +508,13 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } +static inline struct mem_cgroup *page_memcg_check(struct page *page) +{ + if (PageTail(page)) + return NULL; + return folio_memcg_check((struct folio *)page); +} + static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) { struct mem_cgroup *memcg; @@ -1170,6 +1177,11 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) return NULL; } +static inline struct mem_cgroup *folio_memcg_check(struct folio *folio) +{ + return NULL; +} + static inline struct mem_cgroup *page_memcg_check(struct page *page) { return NULL; -- cgit v1.2.3 From a79390f5d6a78647fd70856bd42b22d994de0ba2 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 4 Jan 2023 17:52:06 -0500 Subject: mm/mprotect: use long for page accountings and retval Switch to use type "long" for page accountings and retval across the whole procedure of change_protection(). The change should have shrinked the possible maximum page number to be half comparing to previous (ULONG_MAX / 2), but it shouldn't overflow on any system either because the maximum possible pages touched by change protection should be ULONG_MAX / PAGE_SIZE. Two reasons to switch from "unsigned long" to "long": 1. It suites better on count_vm_numa_events(), whose 2nd parameter takes a long type. 2. It paves way for returning negative (error) values in the future. Currently the only caller that consumes this retval is change_prot_numa(), where the unsigned long was converted to an int. Since at it, touching up the numa code to also take a long, so it'll avoid any possible overflow too during the int-size convertion. Link: https://lkml.kernel.org/r/20230104225207.1066932-3-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: Mike Kravetz Acked-by: James Houghton Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Muchun Song Cc: Nadav Amit Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- include/linux/mm.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b6b10101bea7..e3aa336df900 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -248,7 +248,7 @@ void hugetlb_vma_lock_release(struct kref *kref); int pmd_huge(pmd_t pmd); int pud_huge(pud_t pud); -unsigned long hugetlb_change_protection(struct vm_area_struct *vma, +long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags); @@ -437,7 +437,7 @@ static inline void move_hugetlb_state(struct folio *old_folio, { } -static inline unsigned long hugetlb_change_protection( +static inline long hugetlb_change_protection( struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags) diff --git a/include/linux/mm.h b/include/linux/mm.h index 329ed67edd76..4ac5ea4b584c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2132,7 +2132,7 @@ static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma } bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, pte_t pte); -extern unsigned long change_protection(struct mmu_gather *tlb, +extern long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long cp_flags); extern int mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, -- cgit v1.2.3 From d1751118c88673fe5a948ad82277898e9e284c55 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 4 Jan 2023 17:52:07 -0500 Subject: mm/uffd: detect pgtable allocation failures Before this patch, when there's any pgtable allocation issues happened during change_protection(), the error will be ignored from the syscall. For shmem, there will be an error dumped into the host dmesg. Two issues with that: (1) Doing a trace dump when allocation fails is not anything close to grace. (2) The user should be notified with any kind of such error, so the user can trap it and decide what to do next, either by retrying, or stop the process properly, or anything else. For userfault users, this will change the API of UFFDIO_WRITEPROTECT when pgtable allocation failure happened. It should not normally break anyone, though. If it breaks, then in good ways. One man-page update will be on the way to introduce the new -ENOMEM for UFFDIO_WRITEPROTECT. Not marking stable so we keep the old behavior on the 5.19-till-now kernels. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20230104225207.1066932-4-peterx@redhat.com Signed-off-by: Peter Xu Reported-by: James Houghton Acked-by: James Houghton Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Mike Kravetz Cc: Muchun Song Cc: Nadav Amit Signed-off-by: Andrew Morton --- include/linux/userfaultfd_k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 9df0b9a762cc..3767f18114ef 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -73,7 +73,7 @@ extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, extern int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, atomic_t *mmap_changing); -extern void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma, +extern long uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma, unsigned long start, unsigned long len, bool enable_wp); /* mm helpers */ -- cgit v1.2.3 From 9eefefd835e451d340f5e95bc14ffd68b9b99268 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Thu, 5 Jan 2023 13:04:24 +0100 Subject: mm: remove an ambiguous sentence from kmap_local_folio() kdocs In the kdocs of kmap_local_folio() there is a an ambiguous sentence which suggests to use this API "only when really necessary". On the contrary, since kmap() and kmap_atomic() are deprecated, both kmap_local_folio(), as well as kmap_local_page(), must be preferred to the previous ones. Therefore, remove the above-mentioned sentence exactly how it has previously been done for the kmap_local_page() kdocs in commit 72f1c55adf70 ("highmem: delete a sentence from kmap_local_page() kdocs"). Link: https://lkml.kernel.org/r/20230105120424.30055-1-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Reviewed-by: Ira Weiny Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/highmem.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 44242268f53b..daeb0d8e753a 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -119,9 +119,8 @@ static inline void *kmap_local_page(struct page *page); * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * - * While it is significantly faster than kmap() for the higmem case it - * comes with restrictions about the pointer validity. Only use when really - * necessary. + * While it is significantly faster than kmap() for the highmem case it + * comes with restrictions about the pointer validity. * * On HIGHMEM enabled systems mapping a highmem page has the side effect of * disabling migration in order to keep the virtual address stable across -- cgit v1.2.3 From 1f8549fce525bc95df40ea3ddbfc6e8e719d188d Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Thu, 5 Jan 2023 13:13:05 +0100 Subject: mm: fix spelling mistake in highmem.h Substitute "higmem" with "highmem" in highmem.h. Link: https://lkml.kernel.org/r/20230105121305.30714-1-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: "Matthew Wilcox (Oracle)" Reviewed-by: Ira Weiny Signed-off-by: Andrew Morton --- include/linux/highmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index daeb0d8e753a..d7097b8158f2 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -86,8 +86,8 @@ static inline void kmap_flush_unused(void); * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * - * While it is significantly faster than kmap() for the higmem case it - * comes with restrictions about the pointer validity. + * While kmap_local_page() is significantly faster than kmap() for the highmem + * case it comes with restrictions about the pointer validity. * * On HIGHMEM enabled systems mapping a highmem page has the side effect of * disabling migration in order to keep the virtual address stable across -- cgit v1.2.3 From bbc61844b4645d54c147a82654ac974bb7be85de Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 4 Jan 2023 14:06:05 +0800 Subject: mm/kasan: simplify and refine kasan_cache code struct 'kasan_cache' has a member 'is_kmalloc' indicating whether its host kmem_cache is a kmalloc cache. With newly introduced is_kmalloc_cache() helper, 'is_kmalloc' and its related function can be replaced and removed. Also 'kasan_cache' is only needed by KASAN generic mode, and not by SW/HW tag modes, so refine its protection macro accordingly, suggested by Andrey Konoval. Link: https://lkml.kernel.org/r/20230104060605.930910-2-feng.tang@intel.com Signed-off-by: Feng Tang Reviewed-by: Andrey Konovalov Acked-by: Vlastimil Babka Acked-by: David Rientjes Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Christoph Lameter Cc: Dmitry Vyukov Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Pekka Enberg Cc: Roman Gushchin Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- include/linux/kasan.h | 22 +++++----------------- include/linux/slab_def.h | 2 +- include/linux/slub_def.h | 2 +- 3 files changed, 7 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5ebbaf672009..f7ef70661ce2 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -96,15 +96,6 @@ static inline bool kasan_has_integrated_init(void) } #ifdef CONFIG_KASAN - -struct kasan_cache { -#ifdef CONFIG_KASAN_GENERIC - int alloc_meta_offset; - int free_meta_offset; -#endif - bool is_kmalloc; -}; - void __kasan_unpoison_range(const void *addr, size_t size); static __always_inline void kasan_unpoison_range(const void *addr, size_t size) { @@ -129,13 +120,6 @@ static __always_inline bool kasan_unpoison_pages(struct page *page, return false; } -void __kasan_cache_create_kmalloc(struct kmem_cache *cache); -static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) -{ - if (kasan_enabled()) - __kasan_cache_create_kmalloc(cache); -} - void __kasan_poison_slab(struct slab *slab); static __always_inline void kasan_poison_slab(struct slab *slab) { @@ -255,7 +239,6 @@ static inline bool kasan_unpoison_pages(struct page *page, unsigned int order, { return false; } -static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct slab *slab) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} @@ -306,6 +289,11 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {} #ifdef CONFIG_KASAN_GENERIC +struct kasan_cache { + int alloc_meta_offset; + int free_meta_offset; +}; + size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object); slab_flags_t kasan_never_merge(void); void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 5834bad8ad78..a61e7d55d0d3 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -72,7 +72,7 @@ struct kmem_cache { int obj_offset; #endif /* CONFIG_DEBUG_SLAB */ -#ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_GENERIC struct kasan_cache kasan_info; #endif diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index aa0ee1678d29..f6df03f934e5 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -136,7 +136,7 @@ struct kmem_cache { unsigned int *random_seq; #endif -#ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_GENERIC struct kasan_cache kasan_info; #endif -- cgit v1.2.3 From e9adcfecf572fcfaa9f8525904cf49c709974f73 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 3 Jan 2023 16:27:32 -0800 Subject: mm: remove zap_page_range and create zap_vma_pages zap_page_range was originally designed to unmap pages within an address range that could span multiple vmas. While working on [1], it was discovered that all callers of zap_page_range pass a range entirely within a single vma. In addition, the mmu notification call within zap_page range does not correctly handle ranges that span multiple vmas. When crossing a vma boundary, a new mmu_notifier_range_init/end call pair with the new vma should be made. Instead of fixing zap_page_range, do the following: - Create a new routine zap_vma_pages() that will remove all pages within the passed vma. Most users of zap_page_range pass the entire vma and can use this new routine. - For callers of zap_page_range not passing the entire vma, instead call zap_page_range_single(). - Remove zap_page_range. [1] https://lore.kernel.org/linux-mm/20221114235507.294320-2-mike.kravetz@oracle.com/ Link: https://lkml.kernel.org/r/20230104002732.232573-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Suggested-by: Peter Xu Acked-by: Michal Hocko Acked-by: Peter Xu Acked-by: Heiko Carstens [s390] Reviewed-by: Christoph Hellwig Cc: Christian Borntraeger Cc: Christian Brauner Cc: Dave Hansen Cc: David Hildenbrand Cc: Eric Dumazet Cc: Matthew Wilcox Cc: Michael Ellerman Cc: Nadav Amit Cc: Palmer Dabbelt Cc: Rik van Riel Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4ac5ea4b584c..eb5bfc77c2c2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1977,10 +1977,13 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); -void zap_page_range(struct vm_area_struct *vma, unsigned long address, - unsigned long size); void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details); +static inline void zap_vma_pages(struct vm_area_struct *vma) +{ + zap_page_range_single(vma, vma->vm_start, + vma->vm_end - vma->vm_start, NULL); +} void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); -- cgit v1.2.3 From fc4f4be9b5271e43eeb4c675d190fa9734de9ea3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 2 Jan 2023 17:08:54 +0100 Subject: mm/nommu: factor out check for NOMMU shared mappings into is_nommu_shared_mapping() Patch series "mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings". Trying to reduce the confusion around VM_SHARED and VM_MAYSHARE first requires !CONFIG_MMU to stop using VM_MAYSHARE for MAP_PRIVATE mappings. CONFIG_MMU only sets VM_MAYSHARE for MAP_SHARED mappings. This paves the way for further VM_MAYSHARE and VM_SHARED cleanups: for example, renaming VM_MAYSHARED to VM_MAP_SHARED to make it cleaner what is actually means. Let's first get the weird case out of the way and not use VM_MAYSHARE in MAP_PRIVATE mappings, using a new VM_MAYOVERLAY flag instead. This patch (of 3): We want to stop using VM_MAYSHARE in private mappings to pave the way for clarifying the semantics of VM_MAYSHARE vs. VM_SHARED and reduce the confusion. While CONFIG_MMU uses VM_MAYSHARE to represent MAP_SHARED, !CONFIG_MMU also sets VM_MAYSHARE for selected R/O private file mappings that are an effective overlay of a file mapping. Let's factor out all relevant VM_MAYSHARE checks in !CONFIG_MMU code into is_nommu_shared_mapping() first. Note that whenever VM_SHARED is set, VM_MAYSHARE must be set as well (unless there is a serious BUG). So there is not need to test for VM_SHARED manually. No functional change intended. Link: https://lkml.kernel.org/r/20230102160856.500584-1-david@redhat.com Link: https://lkml.kernel.org/r/20230102160856.500584-2-david@redhat.com Signed-off-by: David Hildenbrand Cc: Arnd Bergmann Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: Jens Axboe Cc: Nicolas Pitre Cc: Pavel Begunkov Signed-off-by: Andrew Morton --- include/linux/mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index eb5bfc77c2c2..791bac40bf8e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1347,6 +1347,21 @@ static inline bool is_cow_mapping(vm_flags_t flags) return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } +#ifndef CONFIG_MMU +static inline bool is_nommu_shared_mapping(vm_flags_t flags) +{ + /* + * NOMMU shared mappings are ordinary MAP_SHARED mappings and selected + * R/O MAP_PRIVATE file mappings that are an effective R/O overlay of + * a file mapping. R/O MAP_PRIVATE mappings might still modify + * underlying memory if ptrace is active, so this is only possible if + * ptrace does not apply. Note that there is no mprotect() to upgrade + * write permissions later. + */ + return flags & VM_MAYSHARE; +} +#endif + #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif -- cgit v1.2.3 From b6b7a8faf05c709cd9f63d3b7d9c66bd91bc3b0d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 2 Jan 2023 17:08:55 +0100 Subject: mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings Let's stop using VM_MAYSHARE for MAP_PRIVATE mappings and use VM_MAYOVERLAY instead. Rewrite determine_vm_flags() to make the whole logic easier to digest, and to cleanly separate MAP_PRIVATE vs. MAP_SHARED. No functional change intended. Link: https://lkml.kernel.org/r/20230102160856.500584-3-david@redhat.com Signed-off-by: David Hildenbrand Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Jens Axboe Cc: Nicolas Pitre Cc: Pavel Begunkov Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 791bac40bf8e..8a8563359946 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -276,7 +276,12 @@ extern unsigned int kobjsize(const void *objp); #define VM_MAYSHARE 0x00000080 #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ +#ifdef CONFIG_MMU #define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */ +#else /* CONFIG_MMU */ +#define VM_MAYOVERLAY 0x00000200 /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */ +#define VM_UFFD_MISSING 0 +#endif /* CONFIG_MMU */ #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */ @@ -1358,7 +1363,7 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags) * ptrace does not apply. Note that there is no mprotect() to upgrade * write permissions later. */ - return flags & VM_MAYSHARE; + return flags & (VM_MAYSHARE | VM_MAYOVERLAY); } #endif -- cgit v1.2.3 From 8788f6781486769d9598dcaedc3fe0eb12fc3e59 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 30 Dec 2022 14:52:51 -0700 Subject: mm: add vma_has_recency() Add vma_has_recency() to indicate whether a VMA may exhibit temporal locality that the LRU algorithm relies on. This function returns false for VMAs marked by VM_SEQ_READ or VM_RAND_READ. While the former flag indicates linear access, i.e., a special case of spatial locality, both flags indicate a lack of temporal locality, i.e., the reuse of an area within a relatively small duration. "Recency" is chosen over "locality" to avoid confusion between temporal and spatial localities. Before this patch, the active/inactive LRU only ignored the accessed bit from VMAs marked by VM_SEQ_READ. After this patch, the active/inactive LRU and MGLRU share the same logic: they both ignore the accessed bit if vma_has_recency() returns false. For the active/inactive LRU, the following fio test showed a [6, 8]% increase in IOPS when randomly accessing mapped files under memory pressure. kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo) kb=$((kb - 8*1024*1024)) modprobe brd rd_nr=1 rd_size=$kb dd if=/dev/zero of=/dev/ram0 bs=1M mkfs.ext4 /dev/ram0 mount /dev/ram0 /mnt/ swapoff -a fio --name=test --directory=/mnt/ --ioengine=mmap --numjobs=8 \ --size=8G --rw=randrw --time_based --runtime=10m \ --group_reporting The discussion that led to this patch is here [1]. Additional test results are available in that thread. [1] https://lore.kernel.org/r/Y31s%2FK8T85jh05wH@google.com/ Link: https://lkml.kernel.org/r/20221230215252.2628425-1-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Alexander Viro Cc: Andrea Righi Cc: Johannes Weiner Cc: Michael Larabel Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index acf03147fff8..4abebf2615a3 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -594,4 +594,12 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, #endif } +static inline bool vma_has_recency(struct vm_area_struct *vma) +{ + if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)) + return false; + + return true; +} + #endif -- cgit v1.2.3 From 17e810229cb3068b692fa078bd9b3a6527e0866a Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 30 Dec 2022 14:52:52 -0700 Subject: mm: support POSIX_FADV_NOREUSE This patch adds POSIX_FADV_NOREUSE to vma_has_recency() so that the LRU algorithm can ignore access to mapped files marked by this flag. The advantages of POSIX_FADV_NOREUSE are: 1. Unlike MADV_SEQUENTIAL and MADV_RANDOM, it does not alter the default readahead behavior. 2. Unlike MADV_SEQUENTIAL and MADV_RANDOM, it does not split VMAs and therefore does not take mmap_lock. 3. Unlike MADV_COLD, setting it has a negligible cost, regardless of how many pages it affects. Its limitations are: 1. Like POSIX_FADV_RANDOM and POSIX_FADV_SEQUENTIAL, it currently does not support range. IOW, its scope is the entire file. 2. It currently does not ignore access through file descriptors. Specifically, for the active/inactive LRU, given a file page shared by two users and one of them having set POSIX_FADV_NOREUSE on the file, this page will be activated upon the second user accessing it. This corner case can be covered by checking POSIX_FADV_NOREUSE before calling folio_mark_accessed() on the read path. But it is considered not worth the effort. There have been a few attempts to support POSIX_FADV_NOREUSE, e.g., [1]. This time the goal is to fill a niche: a few desktop applications, e.g., large file transferring and video encoding/decoding, want fast file streaming with mmap() rather than direct IO. Among those applications, an SVT-AV1 regression was reported when running with MGLRU [2]. The following test can reproduce that regression. kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo) kb=$((kb - 8*1024*1024)) modprobe brd rd_nr=1 rd_size=$kb dd if=/dev/zero of=/dev/ram0 bs=1M mkfs.ext4 /dev/ram0 mount /dev/ram0 /mnt/ swapoff -a fallocate -l 8G /mnt/swapfile mkswap /mnt/swapfile swapon /mnt/swapfile wget http://ultravideo.cs.tut.fi/video/Bosphorus_3840x2160_120fps_420_8bit_YUV_Y4M.7z 7z e -o/mnt/ Bosphorus_3840x2160_120fps_420_8bit_YUV_Y4M.7z SvtAv1EncApp --preset 12 -w 3840 -h 2160 \ -i /mnt/Bosphorus_3840x2160.y4m For MGLRU, the following change showed a [9-11]% increase in FPS, which makes it on par with the active/inactive LRU. patch Source/App/EncApp/EbAppMain.c < #include 35d35 < #include /* _O_BINARY */ 117a118 > posix_fadvise(config->mmap.fd, 0, 0, POSIX_FADV_NOREUSE); EOF [1] https://lore.kernel.org/r/1308923350-7932-1-git-send-email-andrea@betterlinux.com/ [2] https://openbenchmarking.org/result/2209259-PTS-MGLRU8GB57 Link: https://lkml.kernel.org/r/20221230215252.2628425-2-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Alexander Viro Cc: Andrea Righi Cc: Johannes Weiner Cc: Michael Larabel Signed-off-by: Andrew Morton --- include/linux/fs.h | 2 ++ include/linux/mm_inline.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c1769a2c5d70..d353c262d669 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -166,6 +166,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File supports DIRECT IO */ #define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000) +#define FMODE_NOREUSE ((__force fmode_t)0x800000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 4abebf2615a3..26dcbda07e92 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -599,6 +599,9 @@ static inline bool vma_has_recency(struct vm_area_struct *vma) if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)) return false; + if (vma->vm_file && (vma->vm_file->f_mode & FMODE_NOREUSE)) + return false; + return true; } -- cgit v1.2.3 From 02d65d6fb1aae151570c8bfd1bd77a8153d2e607 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 6 Jan 2023 15:52:51 -0600 Subject: mm: introduce folio_is_pfmemalloc Add a folio equivalent for page_is_pfmemalloc. This removes two instances of page_is_pfmemalloc(folio_page(folio, 0)) so the folio can be used directly. Link: https://lkml.kernel.org/r/20230106215251.599222-1-sidhartha.kumar@oracle.com Suggested-by: Matthew Wilcox Signed-off-by: Sidhartha Kumar Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: SeongJae Park Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8a8563359946..76c97cb8ee9a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1926,6 +1926,21 @@ static inline bool page_is_pfmemalloc(const struct page *page) return (uintptr_t)page->lru.next & BIT(1); } +/* + * Return true only if the folio has been allocated with + * ALLOC_NO_WATERMARKS and the low watermark was not + * met implying that the system is under some pressure. + */ +static inline bool folio_is_pfmemalloc(const struct folio *folio) +{ + /* + * lru.next has bit 1 set if the page is allocated from the + * pfmemalloc reserves. Callers may simply overwrite it if + * they do not need to preserve that information. + */ + return (uintptr_t)folio->lru.next & BIT(1); +} + /* * Only to be called by the page allocator on a freshly allocated * page. -- cgit v1.2.3 From c1632a0f11209338fc300c66252bcc4686e609e8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:11 +0100 Subject: fs: port ->setattr() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/evm.h | 4 ++-- include/linux/fs.h | 9 ++++----- include/linux/nfs_fs.h | 2 +- include/linux/security.h | 4 ++-- 4 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/evm.h b/include/linux/evm.h index 7a9ee2157f69..1f8f806dd0d1 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -21,7 +21,7 @@ extern enum integrity_status evm_verifyxattr(struct dentry *dentry, void *xattr_value, size_t xattr_value_len, struct integrity_iint_cache *iint); -extern int evm_inode_setattr(struct user_namespace *mnt_userns, +extern int evm_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid); extern int evm_inode_setxattr(struct user_namespace *mnt_userns, @@ -90,7 +90,7 @@ static inline enum integrity_status evm_verifyxattr(struct dentry *dentry, } #endif -static inline int evm_inode_setattr(struct user_namespace *mnt_userns, +static inline int evm_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 7aa302d2ce39..24e378e2835f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2152,8 +2152,7 @@ struct inode_operations { umode_t,dev_t); int (*rename) (struct user_namespace *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); - int (*setattr) (struct user_namespace *, struct dentry *, - struct iattr *); + int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *); int (*getattr) (struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); @@ -3313,7 +3312,7 @@ extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, struct dir_context *); -extern int simple_setattr(struct user_namespace *, struct dentry *, +extern int simple_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern int simple_getattr(struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int); @@ -3368,9 +3367,9 @@ extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); int may_setattr(struct user_namespace *mnt_userns, struct inode *inode, unsigned int ia_valid); -int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *); +int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); -void setattr_copy(struct user_namespace *, struct inode *inode, +void setattr_copy(struct mnt_idmap *, struct inode *inode, const struct iattr *attr); extern int file_update_time(struct file *file); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d92fdfd2444c..7c9628dc61a3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -405,7 +405,7 @@ extern int nfs_clear_invalid_mapping(struct address_space *mapping); extern bool nfs_mapping_need_revalidate_inode(struct inode *inode); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); extern int nfs_revalidate_mapping_rcu(struct inode *inode); -extern int nfs_setattr(struct user_namespace *, struct dentry *, struct iattr *); +extern int nfs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *); extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); diff --git a/include/linux/security.h b/include/linux/security.h index 5b67f208f7de..1ba1f4e70b50 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -356,7 +356,7 @@ int security_inode_readlink(struct dentry *dentry); int security_inode_follow_link(struct dentry *dentry, struct inode *inode, bool rcu); int security_inode_permission(struct inode *inode, int mask); -int security_inode_setattr(struct user_namespace *mnt_userns, +int security_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct user_namespace *mnt_userns, @@ -862,7 +862,7 @@ static inline int security_inode_permission(struct inode *inode, int mask) return 0; } -static inline int security_inode_setattr(struct user_namespace *mnt_userns, +static inline int security_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { -- cgit v1.2.3 From b74d24f7a74ffd2d42ca883d84b7422b8d545901 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:12 +0100 Subject: fs: port ->getattr() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 6 +++--- include/linux/nfs_fs.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 24e378e2835f..0214aee3324e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2153,7 +2153,7 @@ struct inode_operations { int (*rename) (struct user_namespace *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *); - int (*getattr) (struct user_namespace *, const struct path *, + int (*getattr) (struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, @@ -3261,7 +3261,7 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); -void generic_fillattr(struct user_namespace *, struct inode *, struct kstat *); +void generic_fillattr(struct mnt_idmap *, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); @@ -3314,7 +3314,7 @@ extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, struct dir_context *); extern int simple_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); -extern int simple_getattr(struct user_namespace *, const struct path *, +extern int simple_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_open(struct inode *inode, struct file *file); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7c9628dc61a3..0cd89ebd4bb6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -392,7 +392,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr); -extern int nfs_getattr(struct user_namespace *, const struct path *, +extern int nfs_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *, const struct cred *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); -- cgit v1.2.3 From 6c960e68aaed335a0040f16654f3c5e5bfcf9249 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:13 +0100 Subject: fs: port ->create() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0214aee3324e..fddfacf2583a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2139,7 +2139,7 @@ struct inode_operations { int (*readlink) (struct dentry *, char __user *,int); - int (*create) (struct user_namespace *, struct inode *,struct dentry *, + int (*create) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); -- cgit v1.2.3 From 7a77db95511c39be4b2db2ceca152ef589adc2dc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:14 +0100 Subject: fs: port ->symlink() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index fddfacf2583a..4bde68e15d5c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2143,7 +2143,7 @@ struct inode_operations { umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); - int (*symlink) (struct user_namespace *, struct inode *,struct dentry *, + int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *, const char *); int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *, umode_t); -- cgit v1.2.3 From c54bd91e9eaba43f09aadc25b52ea869ff3b5587 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:15 +0100 Subject: fs: port ->mkdir() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4bde68e15d5c..f6b1f0ca261a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2145,7 +2145,7 @@ struct inode_operations { int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *, const char *); - int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *, + int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct user_namespace *, struct inode *,struct dentry *, -- cgit v1.2.3 From 5ebb29bee8d5fc173b774e0755be8cb335503ee3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:16 +0100 Subject: fs: port ->mknod() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f6b1f0ca261a..a28117398e71 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2148,7 +2148,7 @@ struct inode_operations { int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t); int (*rmdir) (struct inode *,struct dentry *); - int (*mknod) (struct user_namespace *, struct inode *,struct dentry *, + int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t,dev_t); int (*rename) (struct user_namespace *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); -- cgit v1.2.3 From e18275ae55e07a2937e48134589c2f4c1d99a369 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:17 +0100 Subject: fs: port ->rename() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a28117398e71..8d287bd2bf9b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2150,7 +2150,7 @@ struct inode_operations { int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t,dev_t); - int (*rename) (struct user_namespace *, struct inode *, struct dentry *, + int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *); int (*getattr) (struct mnt_idmap *, const struct path *, @@ -3323,7 +3323,7 @@ extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); -extern int simple_rename(struct user_namespace *, struct inode *, +extern int simple_rename(struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); extern void simple_recursive_removal(struct dentry *, -- cgit v1.2.3 From 011e2b717b1b921d3706a9d48ff83a025563e826 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:18 +0100 Subject: fs: port ->tmpfile() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8d287bd2bf9b..4855fd071bf8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2162,7 +2162,7 @@ struct inode_operations { int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode); - int (*tmpfile) (struct user_namespace *, struct inode *, + int (*tmpfile) (struct mnt_idmap *, struct inode *, struct file *, umode_t); struct posix_acl *(*get_acl)(struct user_namespace *, struct dentry *, int); -- cgit v1.2.3 From 77435322777d8a8a08264a39111bef94e32b871b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:19 +0100 Subject: fs: port ->get_acl() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 2 +- include/linux/posix_acl.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4855fd071bf8..31a714377ba2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2164,7 +2164,7 @@ struct inode_operations { umode_t create_mode); int (*tmpfile) (struct mnt_idmap *, struct inode *, struct file *, umode_t); - struct posix_acl *(*get_acl)(struct user_namespace *, struct dentry *, + struct posix_acl *(*get_acl)(struct mnt_idmap *, struct dentry *, int); int (*set_acl)(struct user_namespace *, struct dentry *, struct posix_acl *, int); diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index ee608d22ecb9..042ef62f9276 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -102,7 +102,7 @@ static inline void cache_no_acl(struct inode *inode) int vfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl); -struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns, +struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); int vfs_remove_acl(struct user_namespace *mnt_userns, struct dentry *dentry, const char *acl_name); @@ -141,7 +141,7 @@ static inline int vfs_set_acl(struct user_namespace *mnt_userns, return -EOPNOTSUPP; } -static inline struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns, +static inline struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { -- cgit v1.2.3 From 13e83a4923bea7c4f2f6714030cb7e56d20ef7e5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:20 +0100 Subject: fs: port ->set_acl() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 2 +- include/linux/posix_acl.h | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 31a714377ba2..85d8e4bc7798 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2166,7 +2166,7 @@ struct inode_operations { struct file *, umode_t); struct posix_acl *(*get_acl)(struct mnt_idmap *, struct dentry *, int); - int (*set_acl)(struct user_namespace *, struct dentry *, + int (*set_acl)(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); int (*fileattr_set)(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa); diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 042ef62f9276..0282758ba400 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -69,20 +69,20 @@ extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *); extern int __posix_acl_chmod(struct posix_acl **, gfp_t, umode_t); extern struct posix_acl *get_posix_acl(struct inode *, int); -int set_posix_acl(struct user_namespace *, struct dentry *, int, +int set_posix_acl(struct mnt_idmap *, struct dentry *, int, struct posix_acl *); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); struct posix_acl *posix_acl_clone(const struct posix_acl *acl, gfp_t flags); #ifdef CONFIG_FS_POSIX_ACL -int posix_acl_chmod(struct user_namespace *, struct dentry *, umode_t); +int posix_acl_chmod(struct mnt_idmap *, struct dentry *, umode_t); extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, struct posix_acl **); int posix_acl_update_mode(struct user_namespace *, struct inode *, umode_t *, struct posix_acl **); -int simple_set_acl(struct user_namespace *, struct dentry *, +int simple_set_acl(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); extern int simple_acl_create(struct inode *, struct inode *); @@ -100,14 +100,14 @@ static inline void cache_no_acl(struct inode *inode) inode->i_default_acl = NULL; } -int vfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl); struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); -int vfs_remove_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); #else -static inline int posix_acl_chmod(struct user_namespace *mnt_userns, +static inline int posix_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode) { return 0; @@ -134,7 +134,7 @@ static inline void forget_all_cached_acls(struct inode *inode) { } -static inline int vfs_set_acl(struct user_namespace *mnt_userns, +static inline int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, struct posix_acl *acl) { @@ -148,7 +148,7 @@ static inline struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap, return ERR_PTR(-EOPNOTSUPP); } -static inline int vfs_remove_acl(struct user_namespace *mnt_userns, +static inline int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { return -EOPNOTSUPP; -- cgit v1.2.3 From 8782a9aea3ab4d697ad67d1f8ebca38a4e1c24ab Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:21 +0100 Subject: fs: port ->fileattr_set() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fileattr.h | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h index 9e37e063ac69..47c05a9851d0 100644 --- a/include/linux/fileattr.h +++ b/include/linux/fileattr.h @@ -53,7 +53,7 @@ static inline bool fileattr_has_fsx(const struct fileattr *fa) } int vfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); -int vfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, +int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); #endif /* _LINUX_FILEATTR_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 85d8e4bc7798..349f71650fa2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2168,7 +2168,7 @@ struct inode_operations { int); int (*set_acl)(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); - int (*fileattr_set)(struct user_namespace *mnt_userns, + int (*fileattr_set)(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); } ____cacheline_aligned; -- cgit v1.2.3 From 4609e1f18e19c3b302e1eb4858334bca1532f780 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:22 +0100 Subject: fs: port ->permission() to pass mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 23 +++++++++++++---------- include/linux/lsm_hook_defs.h | 2 +- include/linux/namei.h | 6 +++--- include/linux/nfs_fs.h | 2 +- include/linux/security.h | 8 ++++---- include/linux/xattr.h | 12 ++++++------ 6 files changed, 28 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 349f71650fa2..635ce7a7740f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1766,18 +1766,19 @@ static inline void inode_fsgid_set(struct inode *inode, /** * fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped * @sb: the superblock we want a mapping in - * @mnt_userns: user namespace of the relevant mount + * @idmap: idmap of the relevant mount * * Check whether the caller's fsuid and fsgid have a valid mapping in the * s_user_ns of the superblock @sb. If the caller is on an idmapped mount map - * the caller's fsuid and fsgid according to the @mnt_userns first. + * the caller's fsuid and fsgid according to the @idmap first. * * Return: true if fsuid and fsgid is mapped, false if not. */ static inline bool fsuidgid_has_mapping(struct super_block *sb, - struct user_namespace *mnt_userns) + struct mnt_idmap *idmap) { struct user_namespace *fs_userns = sb->s_user_ns; + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); kuid_t kuid; kgid_t kgid; @@ -2134,7 +2135,7 @@ struct file_operations { struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); - int (*permission) (struct user_namespace *, struct inode *, int); + int (*permission) (struct mnt_idmap *, struct inode *, int); struct posix_acl * (*get_inode_acl)(struct inode *, int, bool); int (*readlink) (struct dentry *, char __user *,int); @@ -2322,9 +2323,11 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) -static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns, +static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, struct inode *inode) { + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); + return !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) || !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)); } @@ -2902,16 +2905,16 @@ static inline int bmap(struct inode *inode, sector_t *block) int notify_change(struct mnt_idmap *, struct dentry *, struct iattr *, struct inode **); -int inode_permission(struct user_namespace *, struct inode *, int); -int generic_permission(struct user_namespace *, struct inode *, int); +int inode_permission(struct mnt_idmap *, struct inode *, int); +int generic_permission(struct mnt_idmap *, struct inode *, int); static inline int file_permission(struct file *file, int mask) { - return inode_permission(file_mnt_user_ns(file), + return inode_permission(file_mnt_idmap(file), file_inode(file), mask); } static inline int path_permission(const struct path *path, int mask) { - return inode_permission(mnt_user_ns(path->mnt), + return inode_permission(mnt_idmap(path->mnt), d_inode(path->dentry), mask); } int __check_sticky(struct user_namespace *mnt_userns, struct inode *dir, @@ -3365,7 +3368,7 @@ extern int generic_check_addressable(unsigned, u64); extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); -int may_setattr(struct user_namespace *mnt_userns, struct inode *inode, +int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid); int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ed6cb2ac55fa..894f233083e3 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -154,7 +154,7 @@ LSM_HOOK(int, 0, inode_remove_acl, struct user_namespace *mnt_userns, LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) LSM_HOOK(int, 0, inode_killpriv, struct user_namespace *mnt_userns, struct dentry *dentry) -LSM_HOOK(int, -EOPNOTSUPP, inode_getsecurity, struct user_namespace *mnt_userns, +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecurity, struct mnt_idmap *idmap, struct inode *inode, const char *name, void **buffer, bool alloc) LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/include/linux/namei.h b/include/linux/namei.h index 00fee52df842..0d4531fd46e7 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -68,11 +68,11 @@ extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); -struct dentry *lookup_one(struct user_namespace *, const char *, struct dentry *, int); -struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns, +struct dentry *lookup_one(struct mnt_idmap *, const char *, struct dentry *, int); +struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len); -struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns, +struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0cd89ebd4bb6..d6c119e31d7a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -396,7 +396,7 @@ extern int nfs_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *, const struct cred *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); -extern int nfs_permission(struct user_namespace *, struct inode *, int); +extern int nfs_permission(struct mnt_idmap *, struct inode *, int); extern int nfs_open(struct inode *, struct file *); extern int nfs_attribute_cache_expired(struct inode *inode); extern int nfs_revalidate_inode(struct inode *inode, unsigned long flags); diff --git a/include/linux/security.h b/include/linux/security.h index 1ba1f4e70b50..d9cd7b2d16a2 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -158,7 +158,7 @@ int cap_inode_removexattr(struct user_namespace *mnt_userns, int cap_inode_need_killpriv(struct dentry *dentry); int cap_inode_killpriv(struct user_namespace *mnt_userns, struct dentry *dentry); -int cap_inode_getsecurity(struct user_namespace *mnt_userns, +int cap_inode_getsecurity(struct mnt_idmap *idmap, struct inode *inode, const char *name, void **buffer, bool alloc); extern int cap_mmap_addr(unsigned long addr); @@ -378,7 +378,7 @@ int security_inode_removexattr(struct user_namespace *mnt_userns, int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct user_namespace *mnt_userns, struct dentry *dentry); -int security_inode_getsecurity(struct user_namespace *mnt_userns, +int security_inode_getsecurity(struct mnt_idmap *idmap, struct inode *inode, const char *name, void **buffer, bool alloc); int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags); @@ -936,12 +936,12 @@ static inline int security_inode_killpriv(struct user_namespace *mnt_userns, return cap_inode_killpriv(mnt_userns, dentry); } -static inline int security_inode_getsecurity(struct user_namespace *mnt_userns, +static inline int security_inode_getsecurity(struct mnt_idmap *idmap, struct inode *inode, const char *name, void **buffer, bool alloc) { - return cap_inode_getsecurity(mnt_userns, inode, name, buffer, alloc); + return cap_inode_getsecurity(idmap, inode, name, buffer, alloc); } static inline int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 2e7dd44926e4..b39d156e0098 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -56,25 +56,25 @@ struct xattr { }; ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); -ssize_t vfs_getxattr(struct user_namespace *, struct dentry *, const char *, +ssize_t vfs_getxattr(struct mnt_idmap *, struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); int __vfs_setxattr(struct user_namespace *, struct dentry *, struct inode *, const char *, const void *, size_t, int); int __vfs_setxattr_noperm(struct user_namespace *, struct dentry *, const char *, const void *, size_t, int); -int __vfs_setxattr_locked(struct user_namespace *, struct dentry *, +int __vfs_setxattr_locked(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int, struct inode **); -int vfs_setxattr(struct user_namespace *, struct dentry *, const char *, +int vfs_setxattr(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int); int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *); -int __vfs_removexattr_locked(struct user_namespace *, struct dentry *, +int __vfs_removexattr_locked(struct mnt_idmap *, struct dentry *, const char *, struct inode **); -int vfs_removexattr(struct user_namespace *, struct dentry *, const char *); +int vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); -int vfs_getxattr_alloc(struct user_namespace *mnt_userns, +int vfs_getxattr_alloc(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, char **xattr_value, size_t size, gfp_t flags); -- cgit v1.2.3 From 39f60c1ccee72caa0104145b5dbf5d37cce1ea39 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:23 +0100 Subject: fs: port xattr to mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/capability.h | 5 +++-- include/linux/evm.h | 8 ++++---- include/linux/ima.h | 12 ++++++------ include/linux/lsm_hook_defs.h | 6 +++--- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 22 ++++++++++------------ include/linux/xattr.h | 8 ++++---- 7 files changed, 31 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 65efb74c3585..0a8ba82ef1af 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -42,6 +42,7 @@ struct inode; struct dentry; struct task_struct; struct user_namespace; +struct mnt_idmap; extern const kernel_cap_t __cap_empty_set; extern const kernel_cap_t __cap_init_eff_set; @@ -271,11 +272,11 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns) } /* audit system wants to get cap info from files as well */ -int get_vfs_caps_from_disk(struct user_namespace *mnt_userns, +int get_vfs_caps_from_disk(struct mnt_idmap *idmap, const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); -int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, +int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry, const void **ivalue, size_t size); #endif /* !_LINUX_CAPABILITY_H */ diff --git a/include/linux/evm.h b/include/linux/evm.h index 1f8f806dd0d1..e06aacd3e315 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -24,14 +24,14 @@ extern enum integrity_status evm_verifyxattr(struct dentry *dentry, extern int evm_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid); -extern int evm_inode_setxattr(struct user_namespace *mnt_userns, +extern int evm_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size); extern void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len); -extern int evm_inode_removexattr(struct user_namespace *mnt_userns, +extern int evm_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *xattr_name); extern void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name); @@ -101,7 +101,7 @@ static inline void evm_inode_post_setattr(struct dentry *dentry, int ia_valid) return; } -static inline int evm_inode_setxattr(struct user_namespace *mnt_userns, +static inline int evm_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size) { @@ -116,7 +116,7 @@ static inline void evm_inode_post_setxattr(struct dentry *dentry, return; } -static inline int evm_inode_removexattr(struct user_namespace *mnt_userns, +static inline int evm_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *xattr_name) { diff --git a/include/linux/ima.h b/include/linux/ima.h index 5a0b2a285a18..6f470b658082 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -18,7 +18,7 @@ struct linux_binprm; extern enum hash_algo ima_get_current_hash_algo(void); extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_file_check(struct file *file, int mask); -extern void ima_post_create_tmpfile(struct user_namespace *mnt_userns, +extern void ima_post_create_tmpfile(struct mnt_idmap *idmap, struct inode *inode); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); @@ -30,7 +30,7 @@ extern int ima_read_file(struct file *file, enum kernel_read_file_id id, bool contents); extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); -extern void ima_post_path_mknod(struct user_namespace *mnt_userns, +extern void ima_post_path_mknod(struct mnt_idmap *idmap, struct dentry *dentry); extern int ima_file_hash(struct file *file, char *buf, size_t buf_size); extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size); @@ -66,7 +66,7 @@ static inline int ima_file_check(struct file *file, int mask) return 0; } -static inline void ima_post_create_tmpfile(struct user_namespace *mnt_userns, +static inline void ima_post_create_tmpfile(struct mnt_idmap *idmap, struct inode *inode) { } @@ -111,7 +111,7 @@ static inline int ima_post_read_file(struct file *file, void *buf, loff_t size, return 0; } -static inline void ima_post_path_mknod(struct user_namespace *mnt_userns, +static inline void ima_post_path_mknod(struct mnt_idmap *idmap, struct dentry *dentry) { return; @@ -183,7 +183,7 @@ static inline void ima_post_key_create_or_update(struct key *keyring, #ifdef CONFIG_IMA_APPRAISE extern bool is_ima_appraise_enabled(void); -extern void ima_inode_post_setattr(struct user_namespace *mnt_userns, +extern void ima_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry); extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len); @@ -203,7 +203,7 @@ static inline bool is_ima_appraise_enabled(void) return 0; } -static inline void ima_inode_post_setattr(struct user_namespace *mnt_userns, +static inline void ima_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry) { return; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 894f233083e3..f344c0e7387a 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -136,14 +136,14 @@ LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode, LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask) LSM_HOOK(int, 0, inode_setattr, struct dentry *dentry, struct iattr *attr) LSM_HOOK(int, 0, inode_getattr, const struct path *path) -LSM_HOOK(int, 0, inode_setxattr, struct user_namespace *mnt_userns, +LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) LSM_HOOK(void, LSM_RET_VOID, inode_post_setxattr, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name) LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry) -LSM_HOOK(int, 0, inode_removexattr, struct user_namespace *mnt_userns, +LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name) LSM_HOOK(int, 0, inode_set_acl, struct user_namespace *mnt_userns, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) @@ -152,7 +152,7 @@ LSM_HOOK(int, 0, inode_get_acl, struct user_namespace *mnt_userns, LSM_HOOK(int, 0, inode_remove_acl, struct user_namespace *mnt_userns, struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) -LSM_HOOK(int, 0, inode_killpriv, struct user_namespace *mnt_userns, +LSM_HOOK(int, 0, inode_killpriv, struct mnt_idmap *idmap, struct dentry *dentry) LSM_HOOK(int, -EOPNOTSUPP, inode_getsecurity, struct mnt_idmap *idmap, struct inode *inode, const char *name, void **buffer, bool alloc) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 0a5ba81f7367..6e156d2acffc 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -475,7 +475,7 @@ * @inode_killpriv: * The setuid bit is being removed. Remove similar security labels. * Called with the dentry->d_inode->i_mutex held. - * @mnt_userns: user namespace of the mount. + * @idmap: idmap of the mount. * @dentry is the dentry being changed. * Return 0 on success. If error is returned, then the operation * causing setuid bit removal is failed. diff --git a/include/linux/security.h b/include/linux/security.h index d9cd7b2d16a2..474373e631df 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -153,11 +153,10 @@ extern int cap_capset(struct cred *new, const struct cred *old, extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file); int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -int cap_inode_removexattr(struct user_namespace *mnt_userns, +int cap_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name); int cap_inode_need_killpriv(struct dentry *dentry); -int cap_inode_killpriv(struct user_namespace *mnt_userns, - struct dentry *dentry); +int cap_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry); int cap_inode_getsecurity(struct mnt_idmap *idmap, struct inode *inode, const char *name, void **buffer, bool alloc); @@ -359,7 +358,7 @@ int security_inode_permission(struct inode *inode, int mask); int security_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int security_inode_getattr(const struct path *path); -int security_inode_setxattr(struct user_namespace *mnt_userns, +int security_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int security_inode_set_acl(struct user_namespace *mnt_userns, @@ -373,11 +372,10 @@ void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int security_inode_getxattr(struct dentry *dentry, const char *name); int security_inode_listxattr(struct dentry *dentry); -int security_inode_removexattr(struct user_namespace *mnt_userns, +int security_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name); int security_inode_need_killpriv(struct dentry *dentry); -int security_inode_killpriv(struct user_namespace *mnt_userns, - struct dentry *dentry); +int security_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry); int security_inode_getsecurity(struct mnt_idmap *idmap, struct inode *inode, const char *name, void **buffer, bool alloc); @@ -874,7 +872,7 @@ static inline int security_inode_getattr(const struct path *path) return 0; } -static inline int security_inode_setxattr(struct user_namespace *mnt_userns, +static inline int security_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { @@ -918,11 +916,11 @@ static inline int security_inode_listxattr(struct dentry *dentry) return 0; } -static inline int security_inode_removexattr(struct user_namespace *mnt_userns, +static inline int security_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name) { - return cap_inode_removexattr(mnt_userns, dentry, name); + return cap_inode_removexattr(idmap, dentry, name); } static inline int security_inode_need_killpriv(struct dentry *dentry) @@ -930,10 +928,10 @@ static inline int security_inode_need_killpriv(struct dentry *dentry) return cap_inode_need_killpriv(dentry); } -static inline int security_inode_killpriv(struct user_namespace *mnt_userns, +static inline int security_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry) { - return cap_inode_killpriv(mnt_userns, dentry); + return cap_inode_killpriv(idmap, dentry); } static inline int security_inode_getsecurity(struct mnt_idmap *idmap, diff --git a/include/linux/xattr.h b/include/linux/xattr.h index b39d156e0098..6af72461397d 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -42,7 +42,7 @@ struct xattr_handler { struct inode *inode, const char *name, void *buffer, size_t size); int (*set)(const struct xattr_handler *, - struct user_namespace *mnt_userns, struct dentry *dentry, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags); }; @@ -59,16 +59,16 @@ ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, si ssize_t vfs_getxattr(struct mnt_idmap *, struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); -int __vfs_setxattr(struct user_namespace *, struct dentry *, struct inode *, +int __vfs_setxattr(struct mnt_idmap *, struct dentry *, struct inode *, const char *, const void *, size_t, int); -int __vfs_setxattr_noperm(struct user_namespace *, struct dentry *, +int __vfs_setxattr_noperm(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int); int __vfs_setxattr_locked(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int, struct inode **); int vfs_setxattr(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int); -int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *); +int __vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *); int __vfs_removexattr_locked(struct mnt_idmap *, struct dentry *, const char *, struct inode **); int vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *); -- cgit v1.2.3 From 700b7940526d31117fd20b7ed31156df134fbe7f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:24 +0100 Subject: fs: port acl to mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/evm.h | 14 +++++++------- include/linux/ima.h | 10 +++++----- include/linux/lsm_hook_defs.h | 6 +++--- include/linux/posix_acl.h | 4 ++-- include/linux/security.h | 12 ++++++------ 5 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/evm.h b/include/linux/evm.h index e06aacd3e315..7dc1ee74169f 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -35,20 +35,20 @@ extern int evm_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *xattr_name); extern void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name); -static inline void evm_inode_post_remove_acl(struct user_namespace *mnt_userns, +static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { evm_inode_post_removexattr(dentry, acl_name); } -extern int evm_inode_set_acl(struct user_namespace *mnt_userns, +extern int evm_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl); -static inline int evm_inode_remove_acl(struct user_namespace *mnt_userns, +static inline int evm_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { - return evm_inode_set_acl(mnt_userns, dentry, acl_name, NULL); + return evm_inode_set_acl(idmap, dentry, acl_name, NULL); } static inline void evm_inode_post_set_acl(struct dentry *dentry, const char *acl_name, @@ -129,21 +129,21 @@ static inline void evm_inode_post_removexattr(struct dentry *dentry, return; } -static inline void evm_inode_post_remove_acl(struct user_namespace *mnt_userns, +static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { return; } -static inline int evm_inode_set_acl(struct user_namespace *mnt_userns, +static inline int evm_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) { return 0; } -static inline int evm_inode_remove_acl(struct user_namespace *mnt_userns, +static inline int evm_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { diff --git a/include/linux/ima.h b/include/linux/ima.h index 6f470b658082..172b113a9864 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -187,14 +187,14 @@ extern void ima_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry); extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len); -extern int ima_inode_set_acl(struct user_namespace *mnt_userns, +extern int ima_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl); -static inline int ima_inode_remove_acl(struct user_namespace *mnt_userns, +static inline int ima_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { - return ima_inode_set_acl(mnt_userns, dentry, acl_name, NULL); + return ima_inode_set_acl(idmap, dentry, acl_name, NULL); } extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name); #else @@ -217,7 +217,7 @@ static inline int ima_inode_setxattr(struct dentry *dentry, return 0; } -static inline int ima_inode_set_acl(struct user_namespace *mnt_userns, +static inline int ima_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) { @@ -231,7 +231,7 @@ static inline int ima_inode_removexattr(struct dentry *dentry, return 0; } -static inline int ima_inode_remove_acl(struct user_namespace *mnt_userns, +static inline int ima_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index f344c0e7387a..094b76dc7164 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -145,11 +145,11 @@ LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name) LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry) LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name) -LSM_HOOK(int, 0, inode_set_acl, struct user_namespace *mnt_userns, +LSM_HOOK(int, 0, inode_set_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) -LSM_HOOK(int, 0, inode_get_acl, struct user_namespace *mnt_userns, +LSM_HOOK(int, 0, inode_get_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) -LSM_HOOK(int, 0, inode_remove_acl, struct user_namespace *mnt_userns, +LSM_HOOK(int, 0, inode_remove_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) LSM_HOOK(int, 0, inode_killpriv, struct mnt_idmap *idmap, diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 0282758ba400..21cc29b8a9e8 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -79,7 +79,7 @@ struct posix_acl *posix_acl_clone(const struct posix_acl *acl, gfp_t flags); int posix_acl_chmod(struct mnt_idmap *, struct dentry *, umode_t); extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, struct posix_acl **); -int posix_acl_update_mode(struct user_namespace *, struct inode *, umode_t *, +int posix_acl_update_mode(struct mnt_idmap *, struct inode *, umode_t *, struct posix_acl **); int simple_set_acl(struct mnt_idmap *, struct dentry *, @@ -91,7 +91,7 @@ void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl); void forget_cached_acl(struct inode *inode, int type); void forget_all_cached_acls(struct inode *inode); int posix_acl_valid(struct user_namespace *, const struct posix_acl *); -int posix_acl_permission(struct user_namespace *, struct inode *, +int posix_acl_permission(struct mnt_idmap *, struct inode *, const struct posix_acl *, int); static inline void cache_no_acl(struct inode *inode) diff --git a/include/linux/security.h b/include/linux/security.h index 474373e631df..5984d0d550b4 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -361,12 +361,12 @@ int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -int security_inode_set_acl(struct user_namespace *mnt_userns, +int security_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl); -int security_inode_get_acl(struct user_namespace *mnt_userns, +int security_inode_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); -int security_inode_remove_acl(struct user_namespace *mnt_userns, +int security_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); @@ -879,7 +879,7 @@ static inline int security_inode_setxattr(struct mnt_idmap *idmap, return cap_inode_setxattr(dentry, name, value, size, flags); } -static inline int security_inode_set_acl(struct user_namespace *mnt_userns, +static inline int security_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) @@ -887,14 +887,14 @@ static inline int security_inode_set_acl(struct user_namespace *mnt_userns, return 0; } -static inline int security_inode_get_acl(struct user_namespace *mnt_userns, +static inline int security_inode_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { return 0; } -static inline int security_inode_remove_acl(struct user_namespace *mnt_userns, +static inline int security_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { -- cgit v1.2.3 From f2d40141d5d90b882e2c35b226f9244a63b82b6e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:25 +0100 Subject: fs: port inode_init_owner() to mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 635ce7a7740f..c1d698923d15 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2014,7 +2014,7 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, /* * VFS file helper functions. */ -void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode, +void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, const struct inode *dir, umode_t mode); extern bool may_open_dev(const struct path *path); umode_t mode_strip_sgid(struct user_namespace *mnt_userns, -- cgit v1.2.3 From 01beba7957a26f9b7179127e8ad56bb5a0f56138 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:26 +0100 Subject: fs: port inode_owner_or_capable() to mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c1d698923d15..e6c76f308f5f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1939,7 +1939,7 @@ static inline bool sb_start_intwrite_trylock(struct super_block *sb) return __sb_start_write_trylock(sb, SB_FREEZE_FS); } -bool inode_owner_or_capable(struct user_namespace *mnt_userns, +bool inode_owner_or_capable(struct mnt_idmap *idmap, const struct inode *inode); /* -- cgit v1.2.3 From 9452e93e6dae862d7aeff2b11236d79bde6f9b66 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:27 +0100 Subject: fs: port privilege checking helpers to mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/capability.h | 4 ++-- include/linux/fs.h | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 0a8ba82ef1af..03c2a613ad40 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -249,9 +249,9 @@ static inline bool ns_capable_setid(struct user_namespace *ns, int cap) } #endif /* CONFIG_MULTIUSER */ bool privileged_wrt_inode_uidgid(struct user_namespace *ns, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, const struct inode *inode); -bool capable_wrt_inode_uidgid(struct user_namespace *mnt_userns, +bool capable_wrt_inode_uidgid(struct mnt_idmap *idmap, const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); diff --git a/include/linux/fs.h b/include/linux/fs.h index e6c76f308f5f..696540a86183 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2017,7 +2017,7 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, const struct inode *dir, umode_t mode); extern bool may_open_dev(const struct path *path); -umode_t mode_strip_sgid(struct user_namespace *mnt_userns, +umode_t mode_strip_sgid(struct mnt_idmap *idmap, const struct inode *dir, umode_t mode); /* @@ -2917,7 +2917,7 @@ static inline int path_permission(const struct path *path, int mask) return inode_permission(mnt_idmap(path->mnt), d_inode(path->dentry), mask); } -int __check_sticky(struct user_namespace *mnt_userns, struct inode *dir, +int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode); static inline bool execute_ok(struct inode *inode) @@ -3105,7 +3105,7 @@ extern void __destroy_inode(struct inode *); extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); -extern int setattr_should_drop_suidgid(struct user_namespace *, struct inode *); +extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); extern int file_remove_privs(struct file *); /* @@ -3539,13 +3539,13 @@ static inline bool is_sxid(umode_t mode) return mode & (S_ISUID | S_ISGID); } -static inline int check_sticky(struct user_namespace *mnt_userns, +static inline int check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode) { if (!(dir->i_mode & S_ISVTX)) return 0; - return __check_sticky(mnt_userns, dir, inode); + return __check_sticky(idmap, dir, inode); } static inline void inode_has_no_xattr(struct inode *inode) -- cgit v1.2.3 From f861646a65623bcff91d544acbc4413d62d97b79 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:28 +0100 Subject: quota: port to mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/quotaops.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 0d8625d71733..5f6744b3ceac 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -20,9 +20,10 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) } /* i_mutex must being held */ -static inline bool is_quota_modification(struct user_namespace *mnt_userns, +static inline bool is_quota_modification(struct mnt_idmap *idmap, struct inode *inode, struct iattr *ia) { + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); return ((ia->ia_valid & ATTR_SIZE) || i_uid_needs_update(mnt_userns, ia, inode) || i_gid_needs_update(mnt_userns, ia, inode)); @@ -116,7 +117,7 @@ int dquot_set_dqblk(struct super_block *sb, struct kqid id, struct qc_dqblk *di); int __dquot_transfer(struct inode *inode, struct dquot **transfer_to); -int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode, +int dquot_transfer(struct mnt_idmap *idmap, struct inode *inode, struct iattr *iattr); static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type) @@ -236,7 +237,7 @@ static inline void dquot_free_inode(struct inode *inode) { } -static inline int dquot_transfer(struct user_namespace *mnt_userns, +static inline int dquot_transfer(struct mnt_idmap *idmap, struct inode *inode, struct iattr *iattr) { return 0; -- cgit v1.2.3 From 0dbe12f2e49c046444461b5f4be49df2cafb3a40 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:29 +0100 Subject: fs: port i_{g,u}id_{needs_}update() to mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 24 ++++++++++++++++-------- include/linux/quotaops.h | 5 ++--- 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 696540a86183..3611d459bf88 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1647,7 +1647,7 @@ static inline vfsuid_t i_uid_into_vfsuid(struct user_namespace *mnt_userns, /** * i_uid_needs_update - check whether inode's i_uid needs to be updated - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * @@ -1656,10 +1656,12 @@ static inline vfsuid_t i_uid_into_vfsuid(struct user_namespace *mnt_userns, * * Return: true if @inode's i_uid field needs to be updated, false if not. */ -static inline bool i_uid_needs_update(struct user_namespace *mnt_userns, +static inline bool i_uid_needs_update(struct mnt_idmap *idmap, const struct iattr *attr, const struct inode *inode) { + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); + return ((attr->ia_valid & ATTR_UID) && !vfsuid_eq(attr->ia_vfsuid, i_uid_into_vfsuid(mnt_userns, inode))); @@ -1667,17 +1669,19 @@ static inline bool i_uid_needs_update(struct user_namespace *mnt_userns, /** * i_uid_update - update @inode's i_uid field - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Safely update @inode's i_uid field translating the vfsuid of any idmapped * mount into the filesystem kuid. */ -static inline void i_uid_update(struct user_namespace *mnt_userns, +static inline void i_uid_update(struct mnt_idmap *idmap, const struct iattr *attr, struct inode *inode) { + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); + if (attr->ia_valid & ATTR_UID) inode->i_uid = from_vfsuid(mnt_userns, i_user_ns(inode), attr->ia_vfsuid); @@ -1699,7 +1703,7 @@ static inline vfsgid_t i_gid_into_vfsgid(struct user_namespace *mnt_userns, /** * i_gid_needs_update - check whether inode's i_gid needs to be updated - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * @@ -1708,10 +1712,12 @@ static inline vfsgid_t i_gid_into_vfsgid(struct user_namespace *mnt_userns, * * Return: true if @inode's i_gid field needs to be updated, false if not. */ -static inline bool i_gid_needs_update(struct user_namespace *mnt_userns, +static inline bool i_gid_needs_update(struct mnt_idmap *idmap, const struct iattr *attr, const struct inode *inode) { + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); + return ((attr->ia_valid & ATTR_GID) && !vfsgid_eq(attr->ia_vfsgid, i_gid_into_vfsgid(mnt_userns, inode))); @@ -1719,17 +1725,19 @@ static inline bool i_gid_needs_update(struct user_namespace *mnt_userns, /** * i_gid_update - update @inode's i_gid field - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Safely update @inode's i_gid field translating the vfsgid of any idmapped * mount into the filesystem kgid. */ -static inline void i_gid_update(struct user_namespace *mnt_userns, +static inline void i_gid_update(struct mnt_idmap *idmap, const struct iattr *attr, struct inode *inode) { + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); + if (attr->ia_valid & ATTR_GID) inode->i_gid = from_vfsgid(mnt_userns, i_user_ns(inode), attr->ia_vfsgid); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 5f6744b3ceac..11a4becff3a9 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -23,10 +23,9 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) static inline bool is_quota_modification(struct mnt_idmap *idmap, struct inode *inode, struct iattr *ia) { - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); return ((ia->ia_valid & ATTR_SIZE) || - i_uid_needs_update(mnt_userns, ia, inode) || - i_gid_needs_update(mnt_userns, ia, inode)); + i_uid_needs_update(idmap, ia, inode) || + i_gid_needs_update(idmap, ia, inode)); } #if defined(CONFIG_QUOTA) -- cgit v1.2.3 From e67fe63341b8117d7e0d9acf0f1222d5138b9266 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:30 +0100 Subject: fs: port i_{g,u}id_into_vfs{g,u}id() to mnt_idmap Convert to struct mnt_idmap. Remove legacy file_mnt_user_ns() and mnt_user_ns(). Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 37 ++++++++++++++----------------------- include/linux/mount.h | 1 - 2 files changed, 14 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3611d459bf88..173c5274a63a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1632,16 +1632,17 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) } /** - * i_uid_into_vfsuid - map an inode's i_uid down into a mnt_userns - * @mnt_userns: user namespace of the mount the inode was found from + * i_uid_into_vfsuid - map an inode's i_uid down according to an idmapping + * @idmap: idmap of the mount the inode was found from * @inode: inode to map * - * Return: whe inode's i_uid mapped down according to @mnt_userns. + * Return: whe inode's i_uid mapped down according to @idmap. * If the inode's i_uid has no mapping INVALID_VFSUID is returned. */ -static inline vfsuid_t i_uid_into_vfsuid(struct user_namespace *mnt_userns, +static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap, const struct inode *inode) { + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); return make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid); } @@ -1660,11 +1661,9 @@ static inline bool i_uid_needs_update(struct mnt_idmap *idmap, const struct iattr *attr, const struct inode *inode) { - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - return ((attr->ia_valid & ATTR_UID) && !vfsuid_eq(attr->ia_vfsuid, - i_uid_into_vfsuid(mnt_userns, inode))); + i_uid_into_vfsuid(idmap, inode))); } /** @@ -1688,16 +1687,17 @@ static inline void i_uid_update(struct mnt_idmap *idmap, } /** - * i_gid_into_vfsgid - map an inode's i_gid down into a mnt_userns - * @mnt_userns: user namespace of the mount the inode was found from + * i_gid_into_vfsgid - map an inode's i_gid down according to an idmapping + * @idmap: idmap of the mount the inode was found from * @inode: inode to map * - * Return: the inode's i_gid mapped down according to @mnt_userns. + * Return: the inode's i_gid mapped down according to @idmap. * If the inode's i_gid has no mapping INVALID_VFSGID is returned. */ -static inline vfsgid_t i_gid_into_vfsgid(struct user_namespace *mnt_userns, +static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap, const struct inode *inode) { + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); return make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid); } @@ -1716,11 +1716,9 @@ static inline bool i_gid_needs_update(struct mnt_idmap *idmap, const struct iattr *attr, const struct inode *inode) { - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - return ((attr->ia_valid & ATTR_GID) && !vfsgid_eq(attr->ia_vfsgid, - i_gid_into_vfsgid(mnt_userns, inode))); + i_gid_into_vfsgid(idmap, inode))); } /** @@ -2334,10 +2332,8 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, struct inode *inode) { - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - - return !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) || - !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)); + return !vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) || + !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)); } static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) @@ -2732,11 +2728,6 @@ struct filename { }; static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); -static inline struct user_namespace *file_mnt_user_ns(struct file *file) -{ - return mnt_user_ns(file->f_path.mnt); -} - static inline struct mnt_idmap *file_mnt_idmap(struct file *file) { return mnt_idmap(file->f_path.mnt); diff --git a/include/linux/mount.h b/include/linux/mount.h index 62475996fac6..02db5909d5c2 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -74,7 +74,6 @@ struct vfsmount { struct mnt_idmap *mnt_idmap; } __randomize_layout; -struct user_namespace *mnt_user_ns(const struct vfsmount *mnt); struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap); static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt) { -- cgit v1.2.3 From c14329d39f2daa8132e1bbe5cc531da387bcf44a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:31 +0100 Subject: fs: port fs{g,u}id helpers to mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 21 ++++++++++----------- include/linux/mnt_idmapping.h | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 173c5274a63a..54a95ed68322 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1744,29 +1744,29 @@ static inline void i_gid_update(struct mnt_idmap *idmap, /** * inode_fsuid_set - initialize inode's i_uid field with callers fsuid * @inode: inode to initialize - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * * Initialize the i_uid field of @inode. If the inode was found/created via - * an idmapped mount map the caller's fsuid according to @mnt_users. + * an idmapped mount map the caller's fsuid according to @idmap. */ static inline void inode_fsuid_set(struct inode *inode, - struct user_namespace *mnt_userns) + struct mnt_idmap *idmap) { - inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode)); + inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode)); } /** * inode_fsgid_set - initialize inode's i_gid field with callers fsgid * @inode: inode to initialize - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * * Initialize the i_gid field of @inode. If the inode was found/created via - * an idmapped mount map the caller's fsgid according to @mnt_users. + * an idmapped mount map the caller's fsgid according to @idmap. */ static inline void inode_fsgid_set(struct inode *inode, - struct user_namespace *mnt_userns) + struct mnt_idmap *idmap) { - inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode)); + inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode)); } /** @@ -1784,14 +1784,13 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, struct mnt_idmap *idmap) { struct user_namespace *fs_userns = sb->s_user_ns; - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); kuid_t kuid; kgid_t kgid; - kuid = mapped_fsuid(mnt_userns, fs_userns); + kuid = mapped_fsuid(idmap, fs_userns); if (!uid_valid(kuid)) return false; - kgid = mapped_fsgid(mnt_userns, fs_userns); + kgid = mapped_fsgid(idmap, fs_userns); if (!gid_valid(kgid)) return false; return kuid_has_mapping(fs_userns, kuid) && diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 0ccca33a7a6d..d63e7c84a389 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -375,8 +375,8 @@ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid) } /** - * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns - * @mnt_userns: the mount's idmapping + * mapped_fsuid - return caller's fsuid mapped according to an idmapping + * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * * Use this helper to initialize a new vfs or filesystem object based on @@ -385,18 +385,19 @@ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid) * O_CREAT. Other examples include the allocation of quotas for a specific * user. * - * Return: the caller's current fsuid mapped up according to @mnt_userns. + * Return: the caller's current fsuid mapped up according to @idmap. */ -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, +static inline kuid_t mapped_fsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns) { + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(current_fsuid())); } /** - * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns - * @mnt_userns: the mount's idmapping + * mapped_fsgid - return caller's fsgid mapped according to an idmapping + * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * * Use this helper to initialize a new vfs or filesystem object based on @@ -405,11 +406,12 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, * O_CREAT. Other examples include the allocation of quotas for a specific * user. * - * Return: the caller's current fsgid mapped up according to @mnt_userns. + * Return: the caller's current fsgid mapped up according to @idmap. */ -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns, +static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap, struct user_namespace *fs_userns) { + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(current_fsgid())); } -- cgit v1.2.3 From 4d7ca4090184c153f8ccb1a68ca5cf136dac108b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:32 +0100 Subject: fs: port vfs{g,u}id helpers to mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 14 ++++---------- include/linux/mnt_idmapping.h | 40 ++++++++++++++++++++-------------------- 2 files changed, 24 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 54a95ed68322..bd040fe31014 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1642,8 +1642,7 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap, const struct inode *inode) { - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - return make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid); + return make_vfsuid(idmap, i_user_ns(inode), inode->i_uid); } /** @@ -1679,10 +1678,8 @@ static inline void i_uid_update(struct mnt_idmap *idmap, const struct iattr *attr, struct inode *inode) { - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - if (attr->ia_valid & ATTR_UID) - inode->i_uid = from_vfsuid(mnt_userns, i_user_ns(inode), + inode->i_uid = from_vfsuid(idmap, i_user_ns(inode), attr->ia_vfsuid); } @@ -1697,8 +1694,7 @@ static inline void i_uid_update(struct mnt_idmap *idmap, static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap, const struct inode *inode) { - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - return make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid); + return make_vfsgid(idmap, i_user_ns(inode), inode->i_gid); } /** @@ -1734,10 +1730,8 @@ static inline void i_gid_update(struct mnt_idmap *idmap, const struct iattr *attr, struct inode *inode) { - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - if (attr->ia_valid & ATTR_GID) - inode->i_gid = from_vfsgid(mnt_userns, i_user_ns(inode), + inode->i_gid = from_vfsgid(idmap, i_user_ns(inode), attr->ia_vfsgid); } diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index d63e7c84a389..53d42b0b6f17 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -170,7 +170,7 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns, /** * make_vfsuid - map a filesystem kuid into a mnt_userns - * @mnt_userns: the mount's idmapping + * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * @kuid : kuid to be mapped * @@ -189,11 +189,12 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns, * returned. */ -static inline vfsuid_t make_vfsuid(struct user_namespace *mnt_userns, +static inline vfsuid_t make_vfsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, kuid_t kuid) { uid_t uid; + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); if (no_idmapping(mnt_userns, fs_userns)) return VFSUIDT_INIT(kuid); @@ -208,7 +209,7 @@ static inline vfsuid_t make_vfsuid(struct user_namespace *mnt_userns, /** * make_vfsgid - map a filesystem kgid into a mnt_userns - * @mnt_userns: the mount's idmapping + * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * @kgid : kgid to be mapped * @@ -227,11 +228,12 @@ static inline vfsuid_t make_vfsuid(struct user_namespace *mnt_userns, * returned. */ -static inline vfsgid_t make_vfsgid(struct user_namespace *mnt_userns, +static inline vfsgid_t make_vfsgid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, kgid_t kgid) { gid_t gid; + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); if (no_idmapping(mnt_userns, fs_userns)) return VFSGIDT_INIT(kgid); @@ -246,7 +248,7 @@ static inline vfsgid_t make_vfsgid(struct user_namespace *mnt_userns, /** * from_vfsuid - map a vfsuid into the filesystem idmapping - * @mnt_userns: the mount's idmapping + * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * @vfsuid : vfsuid to be mapped * @@ -255,11 +257,12 @@ static inline vfsgid_t make_vfsgid(struct user_namespace *mnt_userns, * * Return: @vfsuid mapped into the filesystem idmapping */ -static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns, +static inline kuid_t from_vfsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, vfsuid_t vfsuid) { uid_t uid; + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); if (no_idmapping(mnt_userns, fs_userns)) return AS_KUIDT(vfsuid); @@ -273,7 +276,7 @@ static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns, /** * vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem - * @mnt_userns: the mount's idmapping + * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * @vfsuid: vfsuid to be mapped * @@ -283,11 +286,11 @@ static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns, * * Return: true if @vfsuid has a mapping in the filesystem, false if not. */ -static inline bool vfsuid_has_fsmapping(struct user_namespace *mnt_userns, +static inline bool vfsuid_has_fsmapping(struct mnt_idmap *idmap, struct user_namespace *fs_userns, vfsuid_t vfsuid) { - return uid_valid(from_vfsuid(mnt_userns, fs_userns, vfsuid)); + return uid_valid(from_vfsuid(idmap, fs_userns, vfsuid)); } static inline bool vfsuid_has_mapping(struct user_namespace *userns, @@ -311,7 +314,7 @@ static inline kuid_t vfsuid_into_kuid(vfsuid_t vfsuid) /** * from_vfsgid - map a vfsgid into the filesystem idmapping - * @mnt_userns: the mount's idmapping + * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * @vfsgid : vfsgid to be mapped * @@ -320,11 +323,12 @@ static inline kuid_t vfsuid_into_kuid(vfsuid_t vfsuid) * * Return: @vfsgid mapped into the filesystem idmapping */ -static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns, +static inline kgid_t from_vfsgid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, vfsgid_t vfsgid) { gid_t gid; + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); if (no_idmapping(mnt_userns, fs_userns)) return AS_KGIDT(vfsgid); @@ -338,7 +342,7 @@ static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns, /** * vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem - * @mnt_userns: the mount's idmapping + * @idmap: the mount's idmapping * @fs_userns: the filesystem's idmapping * @vfsgid: vfsgid to be mapped * @@ -348,11 +352,11 @@ static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns, * * Return: true if @vfsgid has a mapping in the filesystem, false if not. */ -static inline bool vfsgid_has_fsmapping(struct user_namespace *mnt_userns, +static inline bool vfsgid_has_fsmapping(struct mnt_idmap *idmap, struct user_namespace *fs_userns, vfsgid_t vfsgid) { - return gid_valid(from_vfsgid(mnt_userns, fs_userns, vfsgid)); + return gid_valid(from_vfsgid(idmap, fs_userns, vfsgid)); } static inline bool vfsgid_has_mapping(struct user_namespace *userns, @@ -390,9 +394,7 @@ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid) static inline kuid_t mapped_fsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns) { - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - return from_vfsuid(mnt_userns, fs_userns, - VFSUIDT_INIT(current_fsuid())); + return from_vfsuid(idmap, fs_userns, VFSUIDT_INIT(current_fsuid())); } /** @@ -411,9 +413,7 @@ static inline kuid_t mapped_fsuid(struct mnt_idmap *idmap, static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap, struct user_namespace *fs_userns) { - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - return from_vfsgid(mnt_userns, fs_userns, - VFSGIDT_INIT(current_fsgid())); + return from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(current_fsgid())); } #endif /* _LINUX_MNT_IDMAPPING_H */ -- cgit v1.2.3 From 3707d84c13670bf09b4a9a4dc6733326d8344b31 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:33 +0100 Subject: fs: move mnt_idmap Now that we converted everything to just rely on struct mnt_idmap move it all into a separate file. This ensure that no code can poke around in struct mnt_idmap without any dedicated helpers and makes it easier to extend it in the future. Filesystems will now not be able to conflate mount and filesystem idmappings as they are two distinct types and require distinct helpers that cannot be used interchangeably. We are now also able to extend struct mnt_idmap as we see fit. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/mnt_idmapping.h | 196 +++--------------------------------------- include/linux/mount.h | 1 - 2 files changed, 12 insertions(+), 185 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 53d42b0b6f17..057c89867aa2 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -113,166 +113,19 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) #define AS_KUIDT(val) (kuid_t){ __vfsuid_val(val) } #define AS_KGIDT(val) (kgid_t){ __vfsgid_val(val) } -#ifdef CONFIG_MULTIUSER -/** - * vfsgid_in_group_p() - check whether a vfsuid matches the caller's groups - * @vfsgid: the mnt gid to match - * - * This function can be used to determine whether @vfsuid matches any of the - * caller's groups. - * - * Return: 1 if vfsuid matches caller's groups, 0 if not. - */ -static inline int vfsgid_in_group_p(vfsgid_t vfsgid) -{ - return in_group_p(AS_KGIDT(vfsgid)); -} -#else -static inline int vfsgid_in_group_p(vfsgid_t vfsgid) -{ - return 1; -} -#endif +int vfsgid_in_group_p(vfsgid_t vfsgid); -/** - * initial_idmapping - check whether this is the initial mapping - * @ns: idmapping to check - * - * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, - * [...], 1000 to 1000 [...]. - * - * Return: true if this is the initial mapping, false if not. - */ -static inline bool initial_idmapping(const struct user_namespace *ns) -{ - return ns == &init_user_ns; -} +vfsuid_t make_vfsuid(struct mnt_idmap *idmap, + struct user_namespace *fs_userns, kuid_t kuid); -/** - * no_idmapping - check whether we can skip remapping a kuid/gid - * @mnt_userns: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * - * This function can be used to check whether a remapping between two - * idmappings is required. - * An idmapped mount is a mount that has an idmapping attached to it that - * is different from the filsystem's idmapping and the initial idmapping. - * If the initial mapping is used or the idmapping of the mount and the - * filesystem are identical no remapping is required. - * - * Return: true if remapping can be skipped, false if not. - */ -static inline bool no_idmapping(const struct user_namespace *mnt_userns, - const struct user_namespace *fs_userns) -{ - return initial_idmapping(mnt_userns) || mnt_userns == fs_userns; -} +vfsgid_t make_vfsgid(struct mnt_idmap *idmap, + struct user_namespace *fs_userns, kgid_t kgid); -/** - * make_vfsuid - map a filesystem kuid into a mnt_userns - * @idmap: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * @kuid : kuid to be mapped - * - * Take a @kuid and remap it from @fs_userns into @mnt_userns. Use this - * function when preparing a @kuid to be reported to userspace. - * - * If no_idmapping() determines that this is not an idmapped mount we can - * simply return @kuid unchanged. - * If initial_idmapping() tells us that the filesystem is not mounted with an - * idmapping we know the value of @kuid won't change when calling - * from_kuid() so we can simply retrieve the value via __kuid_val() - * directly. - * - * Return: @kuid mapped according to @mnt_userns. - * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is - * returned. - */ +kuid_t from_vfsuid(struct mnt_idmap *idmap, + struct user_namespace *fs_userns, vfsuid_t vfsuid); -static inline vfsuid_t make_vfsuid(struct mnt_idmap *idmap, - struct user_namespace *fs_userns, - kuid_t kuid) -{ - uid_t uid; - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - - if (no_idmapping(mnt_userns, fs_userns)) - return VFSUIDT_INIT(kuid); - if (initial_idmapping(fs_userns)) - uid = __kuid_val(kuid); - else - uid = from_kuid(fs_userns, kuid); - if (uid == (uid_t)-1) - return INVALID_VFSUID; - return VFSUIDT_INIT(make_kuid(mnt_userns, uid)); -} - -/** - * make_vfsgid - map a filesystem kgid into a mnt_userns - * @idmap: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * @kgid : kgid to be mapped - * - * Take a @kgid and remap it from @fs_userns into @mnt_userns. Use this - * function when preparing a @kgid to be reported to userspace. - * - * If no_idmapping() determines that this is not an idmapped mount we can - * simply return @kgid unchanged. - * If initial_idmapping() tells us that the filesystem is not mounted with an - * idmapping we know the value of @kgid won't change when calling - * from_kgid() so we can simply retrieve the value via __kgid_val() - * directly. - * - * Return: @kgid mapped according to @mnt_userns. - * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is - * returned. - */ - -static inline vfsgid_t make_vfsgid(struct mnt_idmap *idmap, - struct user_namespace *fs_userns, - kgid_t kgid) -{ - gid_t gid; - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - - if (no_idmapping(mnt_userns, fs_userns)) - return VFSGIDT_INIT(kgid); - if (initial_idmapping(fs_userns)) - gid = __kgid_val(kgid); - else - gid = from_kgid(fs_userns, kgid); - if (gid == (gid_t)-1) - return INVALID_VFSGID; - return VFSGIDT_INIT(make_kgid(mnt_userns, gid)); -} - -/** - * from_vfsuid - map a vfsuid into the filesystem idmapping - * @idmap: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * @vfsuid : vfsuid to be mapped - * - * Map @vfsuid into the filesystem idmapping. This function has to be used in - * order to e.g. write @vfsuid to inode->i_uid. - * - * Return: @vfsuid mapped into the filesystem idmapping - */ -static inline kuid_t from_vfsuid(struct mnt_idmap *idmap, - struct user_namespace *fs_userns, - vfsuid_t vfsuid) -{ - uid_t uid; - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - - if (no_idmapping(mnt_userns, fs_userns)) - return AS_KUIDT(vfsuid); - uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid)); - if (uid == (uid_t)-1) - return INVALID_UID; - if (initial_idmapping(fs_userns)) - return KUIDT_INIT(uid); - return make_kuid(fs_userns, uid); -} +kgid_t from_vfsgid(struct mnt_idmap *idmap, + struct user_namespace *fs_userns, vfsgid_t vfsgid); /** * vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem @@ -312,34 +165,6 @@ static inline kuid_t vfsuid_into_kuid(vfsuid_t vfsuid) return AS_KUIDT(vfsuid); } -/** - * from_vfsgid - map a vfsgid into the filesystem idmapping - * @idmap: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * @vfsgid : vfsgid to be mapped - * - * Map @vfsgid into the filesystem idmapping. This function has to be used in - * order to e.g. write @vfsgid to inode->i_gid. - * - * Return: @vfsgid mapped into the filesystem idmapping - */ -static inline kgid_t from_vfsgid(struct mnt_idmap *idmap, - struct user_namespace *fs_userns, - vfsgid_t vfsgid) -{ - gid_t gid; - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); - - if (no_idmapping(mnt_userns, fs_userns)) - return AS_KGIDT(vfsgid); - gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid)); - if (gid == (gid_t)-1) - return INVALID_GID; - if (initial_idmapping(fs_userns)) - return KGIDT_INIT(gid); - return make_kgid(fs_userns, gid); -} - /** * vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem * @idmap: the mount's idmapping @@ -416,4 +241,7 @@ static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap, return from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(current_fsgid())); } +bool check_fsmapping(const struct mnt_idmap *idmap, + const struct super_block *sb); + #endif /* _LINUX_MNT_IDMAPPING_H */ diff --git a/include/linux/mount.h b/include/linux/mount.h index 02db5909d5c2..52f452b2259a 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -74,7 +74,6 @@ struct vfsmount { struct mnt_idmap *mnt_idmap; } __randomize_layout; -struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap); static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt) { /* Pairs with smp_store_release() in do_idmap_mount(). */ -- cgit v1.2.3 From 2c3e674465e73e2f7eb52c39dc5c5e97e78e68ea Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 22 Dec 2022 18:50:47 +0000 Subject: firmware: arm_scmi: Refactor device create/destroy helpers Refactor SCMI device create/destroy helpers: it is now possible to ask for the creation of all the currently requested devices for a whole protocol, not only for the creation of a single well-defined device. While at that, re-instate uniqueness checks on the creation of SCMI SystemPower devices. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221222185049.737625-8-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 4f765bc788ff..0ce5746a4470 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -804,11 +804,6 @@ struct scmi_device { #define to_scmi_dev(d) container_of(d, struct scmi_device, dev) -struct scmi_device * -scmi_device_create(struct device_node *np, struct device *parent, int protocol, - const char *name); -void scmi_device_destroy(struct scmi_device *scmi_dev); - struct scmi_device_id { u8 protocol_id; const char *name; -- cgit v1.2.3 From 338a588e9db3c5ea7a35bb332cb3bdb532fd1f08 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Mon, 16 Jan 2023 12:49:14 +0000 Subject: coresight: trace-id: Add API to dynamically assign Trace ID values The existing mechanism to assign Trace ID values to sources is limited and does not scale for larger multicore / multi trace source systems. The API introduces functions that reserve IDs based on availabilty represented by a coresight_trace_id_map structure. This records the used and free IDs in a bitmap. CPU bound sources such as ETMs use the coresight_trace_id_get_cpu_id coresight_trace_id_put_cpu_id pair of functions. The API will record the ID associated with the CPU. This ensures that the same ID will be re-used while perf events are active on the CPU. The put_cpu_id function will pend release of the ID until all perf cs_etm sessions are complete. For backward compatibility the functions will attempt to use the same CPU IDs as the legacy system would have used if these are still available. Non-cpu sources, such as the STM can use coresight_trace_id_get_system_id / coresight_trace_id_put_system_id. Signed-off-by: Mike Leach [ Fix checkpatch warning in drivers/hwtracing/coresight/coresight-trace-id.c ] Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230116124928.5440-2-mike.leach@linaro.org --- include/linux/coresight-pmu.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index 6c2fd6cc5a98..ffff4e6277e5 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -10,6 +10,16 @@ #define CORESIGHT_ETM_PMU_NAME "cs_etm" #define CORESIGHT_ETM_PMU_SEED 0x10 +/* + * The legacy Trace ID system based on fixed calculation from the cpu + * number. This has been replaced by drivers using a dynamic allocation + * system - but need to retain the legacy algorithm for backward comparibility + * in certain situations:- + * a) new perf running on older systems that generate the legacy mapping + * b) older tools that may not update at the same time as the kernel. + */ +#define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2)) + /* * Below are the definition of bit offsets for perf option, and works as * arbitrary values for all ETM versions. -- cgit v1.2.3 From 42708bac18cf7f09c058058cd4564f879c53b900 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Mon, 16 Jan 2023 12:49:20 +0000 Subject: coresight: etmX.X: stm: Remove trace_id() callback CoreSight sources provide a callback (.trace_id) in the standard source ops which returns the ID to the core code. This was used to check that sources all had a unique Trace ID. Uniqueness is now gauranteed by the Trace ID allocation system, and the check code has been removed from the core. This patch removes the unneeded and unused .trace_id source ops from the ops structure and implementations in etm3x, etm4x and stm. Signed-off-by: Mike Leach Reviewed-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230116124928.5440-8-mike.leach@linaro.org --- include/linux/coresight.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 1554021231f9..e241eb88dfb9 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -314,14 +314,11 @@ struct coresight_ops_link { * Operations available for sources. * @cpu_id: returns the value of the CPU number this component * is associated to. - * @trace_id: returns the value of the component's trace ID as known - * to the HW. * @enable: enables tracing for a source. * @disable: disables tracing for a source. */ struct coresight_ops_source { int (*cpu_id)(struct coresight_device *csdev); - int (*trace_id)(struct coresight_device *csdev); int (*enable)(struct coresight_device *csdev, struct perf_event *event, u32 mode); void (*disable)(struct coresight_device *csdev, -- cgit v1.2.3 From 206bb3858949b6509de75f7d3697303a073cbaa1 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Mon, 16 Jan 2023 12:49:21 +0000 Subject: coresight: trace id: Remove legacy get trace ID function. Removes legacy coresight_get_trace_id() function now its use has been removed from the ETM code. Signed-off-by: Mike Leach Reviewed-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230116124928.5440-9-mike.leach@linaro.org --- include/linux/coresight-pmu.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index ffff4e6277e5..624f4843453e 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -8,7 +8,6 @@ #define _LINUX_CORESIGHT_PMU_H #define CORESIGHT_ETM_PMU_NAME "cs_etm" -#define CORESIGHT_ETM_PMU_SEED 0x10 /* * The legacy Trace ID system based on fixed calculation from the cpu @@ -44,15 +43,4 @@ #define ETM4_CFG_BIT_RETSTK 12 #define ETM4_CFG_BIT_VMID_OPT 15 -static inline int coresight_get_trace_id(int cpu) -{ - /* - * A trace ID of value 0 is invalid, so let's start at some - * random value that fits in 7 bits and go from there. Since - * the common convention is to have data trace IDs be I(N) + 1, - * set instruction trace IDs as a function of the CPU number. - */ - return (CORESIGHT_ETM_PMU_SEED + (cpu * 2)); -} - #endif -- cgit v1.2.3 From aa19bb4c35834dd574b36d482cc44c78816e6fcd Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Mon, 16 Jan 2023 12:49:26 +0000 Subject: coresight: events: PERF_RECORD_AUX_OUTPUT_HW_ID used for Trace ID Use the perf_report_aux_output_id() call to output the CoreSight trace ID and associated CPU as a PERF_RECORD_AUX_OUTPUT_HW_ID record in the perf.data file. Signed-off-by: Mike Leach Reviewed-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230116124928.5440-14-mike.leach@linaro.org --- include/linux/coresight-pmu.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index 624f4843453e..51ac441a37c3 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -7,6 +7,8 @@ #ifndef _LINUX_CORESIGHT_PMU_H #define _LINUX_CORESIGHT_PMU_H +#include + #define CORESIGHT_ETM_PMU_NAME "cs_etm" /* @@ -43,4 +45,16 @@ #define ETM4_CFG_BIT_RETSTK 12 #define ETM4_CFG_BIT_VMID_OPT 15 +/* + * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. + * Used to associate a CPU with the CoreSight Trace ID. + * [07:00] - Trace ID - uses 8 bits to make value easy to read in file. + * [59:08] - Unused (SBZ) + * [63:60] - Version + */ +#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0) +#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60) + +#define CS_AUX_HW_ID_CURR_VERSION 0 + #endif -- cgit v1.2.3 From 93c473948c588978cd55d9a3adad8b3e8057aa21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B3=20=C3=81gila=20Bitsch?= Date: Fri, 13 Jan 2023 01:53:19 +0100 Subject: usb: gadget: add WebUSB landing page support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a custom (non-USB IF) extension to the USB standard: https://wicg.github.io/webusb/ This specification is published under the W3C Community Contributor Agreement, which in particular allows to implement the specification without any royalties. The specification allows USB gadgets to announce an URL to landing page and describes a Javascript interface for websites to interact with the USB gadget, if the user allows it. It is currently supported by Chromium-based browsers, such as Chrome, Edge and Opera on all major operating systems including Linux. This patch adds optional support for Linux-based USB gadgets wishing to expose such a landing page. During device enumeration, a host recognizes that the announced USB version is at least 2.01, which means, that there are BOS descriptors available. The device than announces WebUSB support using a platform device capability. This includes a vendor code under which the landing page URL can be retrieved using a vendor-specific request. Previously, the BOS descriptors would unconditionally include an LPM related descriptor, as BOS descriptors were only ever sent when the device was LPM capable. As this is no longer the case, this patch puts this descriptor behind a lpm_capable condition. Usage is modeled after os_desc descriptors: echo 1 > webusb/use echo "https://www.kernel.org" > webusb/landingPage lsusb will report the device with the following lines: Platform Device Capability: bLength 24 bDescriptorType 16 bDevCapabilityType 5 bReserved 0 PlatformCapabilityUUID {3408b638-09a9-47a0-8bfd-a0768815b665} WebUSB: bcdVersion 1.00 bVendorCode 0 iLandingPage 1 https://www.kernel.org Signed-off-by: Jó Ágila Bitsch Link: https://lore.kernel.org/r/Y8Crf8P2qAWuuk/F@jo-einhundert Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 7 ++++ include/linux/usb/webusb.h | 83 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 include/linux/usb/webusb.h (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 43ac3fa760db..91d22c3ed458 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -474,6 +475,12 @@ struct usb_composite_dev { struct usb_configuration *os_desc_config; unsigned int use_os_string:1; + /* WebUSB */ + u16 bcd_webusb_version; + u8 b_webusb_vendor_code; + char landing_page[WEBUSB_URL_RAW_MAX_LENGTH]; + unsigned int use_webusb:1; + /* private: */ /* internals */ unsigned int suspended:1; diff --git a/include/linux/usb/webusb.h b/include/linux/usb/webusb.h new file mode 100644 index 000000000000..b430d84357f3 --- /dev/null +++ b/include/linux/usb/webusb.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * WebUSB descriptors and constants + * + * Copyright (C) 2023 Jó Ágila Bitsch + */ + +#ifndef __LINUX_USB_WEBUSB_H +#define __LINUX_USB_WEBUSB_H + +#include "uapi/linux/usb/ch9.h" + +/* + * little endian PlatformCapablityUUID for WebUSB + * 3408b638-09a9-47a0-8bfd-a0768815b665 + * to identify Platform Device Capability descriptors as referring to WebUSB + * + * the UUID above MUST be sent over the wire as the byte sequence: + * {0x38, 0xB6, 0x08, 0x34, 0xA9, 0x09, 0xA0, 0x47, 0x8B, 0xFD, 0xA0, 0x76, 0x88, 0x15, 0xB6, 0x65}. + */ +#define WEBUSB_UUID \ + UUID_INIT(0x38b60834, 0xa909, 0xa047, 0x8b, 0xfd, 0xa0, 0x76, 0x88, 0x15, 0xb6, 0x65) + +/* + * WebUSB Platform Capability data + * + * A device announces support for the + * WebUSB command set by including the following Platform Descriptor Data in its + * Binary Object Store associated with the WebUSB_UUID above. + * See: https://wicg.github.io/webusb/#webusb-platform-capability-descriptor + */ +struct usb_webusb_cap_data { + __le16 bcdVersion; +#define WEBUSB_VERSION_1_00 cpu_to_le16(0x0100) /* currently only version 1.00 is defined */ + u8 bVendorCode; + u8 iLandingPage; +#define WEBUSB_LANDING_PAGE_NOT_PRESENT 0 +#define WEBUSB_LANDING_PAGE_PRESENT 1 /* we chose the fixed index 1 for the URL descriptor */ +} __packed; + +#define USB_WEBUSB_CAP_DATA_SIZE 4 + +/* + * Get URL Request + * + * The request to fetch an URL is defined in https://wicg.github.io/webusb/#get-url as: + * bmRequestType: (USB_DIR_IN | USB_TYPE_VENDOR) = 11000000B + * bRequest: bVendorCode + * wValue: iLandingPage + * wIndex: GET_URL = 2 + * wLength: Descriptor Length (typically U8_MAX = 255) + * Data: URL Descriptor + */ +#define WEBUSB_GET_URL 2 + +/* + * This descriptor contains a single URL and is returned by the Get URL request. + * + * See: https://wicg.github.io/webusb/#url-descriptor + */ +struct webusb_url_descriptor { + u8 bLength; +#define WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH 3 + u8 bDescriptorType; +#define WEBUSB_URL_DESCRIPTOR_TYPE 3 + u8 bScheme; +#define WEBUSB_URL_SCHEME_HTTP 0 +#define WEBUSB_URL_SCHEME_HTTPS 1 +#define WEBUSB_URL_SCHEME_NONE 255 + u8 URL[U8_MAX - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH]; +} __packed; + +/* + * Buffer size to hold the longest URL that can be in an URL descriptor + * + * The descriptor can be U8_MAX bytes long. + * WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH bytes are used for a header. + * Since the longest prefix that might be stripped is "https://", we may accommodate an additional + * 8 bytes. + */ +#define WEBUSB_URL_RAW_MAX_LENGTH (U8_MAX - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + 8) + +#endif /* __LINUX_USB_USBNET_H */ -- cgit v1.2.3 From 52af7863508e94f3e05e43a3f2f3943a71dcffab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 17 Jan 2023 17:21:20 +0200 Subject: device property: Make fwnode_graph_for_each_endpoint() consistent Make fwnode_graph_for_each_endpoint() consistent with the rest of for_each_*() definitions in the file, i.e. use the form of for (iter = func(NULL); iter; \ iter = func(iter)) as it's done in all the rest of the similar macro definitions. Signed-off-by: Andy Shevchenko Acked-by: Rafael J. Wysocki Acked-by: Sakari Ailus Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230117152120.42531-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 1ffd4f9bb67b..0a29db15ff34 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -436,9 +436,9 @@ fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode, unsigned int fwnode_graph_get_endpoint_count(struct fwnode_handle *fwnode, unsigned long flags); -#define fwnode_graph_for_each_endpoint(fwnode, child) \ - for (child = NULL; \ - (child = fwnode_graph_get_next_endpoint(fwnode, child)); ) +#define fwnode_graph_for_each_endpoint(fwnode, child) \ + for (child = fwnode_graph_get_next_endpoint(fwnode, NULL); child; \ + child = fwnode_graph_get_next_endpoint(fwnode, child)) int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); -- cgit v1.2.3 From e2192de59e457aef8d1f055a452131f0b3e5d097 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jan 2023 14:26:53 +0100 Subject: bitfield: add FIELD_PREP_CONST() Neither FIELD_PREP() nor *_encode_bits() can be used in constant contexts (such as initializers), but we don't want to define shift constants for all masks just for use in initializers, and having checks that the values fit is also useful. Therefore, add FIELD_PREP_CONST() which is a smaller version of FIELD_PREP() that can only take constant arguments and has less friendly (but not less strict) error checks, and expands to a constant value. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20230118142652.53f20593504b.Iaeea0aee77a6493d70e573b4aa55c91c00e01e4b@changeid Signed-off-by: Johannes Berg --- include/linux/bitfield.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index c9be1657f03d..ebfa12f69501 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -115,6 +115,32 @@ ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \ }) +#define __BF_CHECK_POW2(n) BUILD_BUG_ON_ZERO(((n) & ((n) - 1)) != 0) + +/** + * FIELD_PREP_CONST() - prepare a constant bitfield element + * @_mask: shifted mask defining the field's length and position + * @_val: value to put in the field + * + * FIELD_PREP_CONST() masks and shifts up the value. The result should + * be combined with other fields of the bitfield using logical OR. + * + * Unlike FIELD_PREP() this is a constant expression and can therefore + * be used in initializers. Error checking is less comfortable for this + * version, and non-constant masks cannot be used. + */ +#define FIELD_PREP_CONST(_mask, _val) \ + ( \ + /* mask must be non-zero */ \ + BUILD_BUG_ON_ZERO((_mask) == 0) + \ + /* check if value fits */ \ + BUILD_BUG_ON_ZERO(~((_mask) >> __bf_shf(_mask)) & (_val)) + \ + /* check if mask is contiguous */ \ + __BF_CHECK_POW2((_mask) + (1ULL << __bf_shf(_mask))) + \ + /* and create the value */ \ + (((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask)) \ + ) + /** * FIELD_GET() - extract a bitfield element * @_mask: shifted mask defining the field's length and position -- cgit v1.2.3 From 41000b03af9e15075061cdbeea461cfa5e12a8eb Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Thu, 24 Nov 2022 13:39:08 +0100 Subject: earlycon: Increase options size Now that the clock frequency is also part of the options, 16 bytes is too little. Without this patch dmesg does not show the whole options, Eg: earlycon: uart0 at MMIO32 0x00000000fedc9000 (options '115200n8,480000') instead of: '115200n8,48000000' Signed-off-by: Ricardo Ribalda Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20221123-serial-clk-v3-2-49c516980ae0@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index fd59f600094a..026294c6ccb8 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -781,7 +781,7 @@ static inline int uart_poll_timeout(struct uart_port *port) struct earlycon_device { struct console *con; struct uart_port port; - char options[16]; /* e.g., 115200n8 */ + char options[32]; /* e.g., 115200n8 */ unsigned int baud; }; -- cgit v1.2.3 From ef460db2a7c1df5eda2ea6011e7586e54e23b8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 25 Nov 2022 15:05:04 +0200 Subject: serial: 8250: Use defined IER bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of literal 0x0f, add a define for enabling all IER bits the 8250 driver is interested in. Don't make the define for combined flags part of UAPI. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221125130509.8482-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial.h b/include/linux/serial.h index 3d6fe3ef92cf..ad6e1c37e2d5 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -12,6 +12,11 @@ #include #include +#define UART_IER_ALL_INTR (UART_IER_MSI | \ + UART_IER_RLSI | \ + UART_IER_THRI | \ + UART_IER_RDI) + /* Helper for dealing with UART_LCR_WLEN* defines */ #define UART_LCR_WLEN(x) ((x) - 5) -- cgit v1.2.3 From d9c1d3cbdeec94f077679b73ed5ce3a4fe4bf4b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 25 Nov 2022 15:05:05 +0200 Subject: serial: 8250: Name MSR literals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add UART_MSR_STATUS_BITS for CD, RI, DSR & CTS. Use names for the literal. Don't make the define for combined flags part of UAPI. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221125130509.8482-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial.h b/include/linux/serial.h index ad6e1c37e2d5..bfda927dde15 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -28,6 +28,11 @@ static inline bool uart_lsr_tx_empty(u16 lsr) return (lsr & UART_LSR_BOTH_EMPTY) == UART_LSR_BOTH_EMPTY; } +#define UART_MSR_STATUS_BITS (UART_MSR_DCD | \ + UART_MSR_RI | \ + UART_MSR_DSR | \ + UART_MSR_CTS) + /* * Counters of the input lines (CTS, DSR, RI, CD) interrupts */ -- cgit v1.2.3 From feb36abbedea2644bf31693aa287a33a3a9fbd7c Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 12 Jan 2023 09:01:31 +0100 Subject: tty: vt: remove struct uni_screen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It contains only lines with pointers to characters (u32s). So use simple clear 'u32 **lines' all over the code. This avoids zero-length arrays. It also makes the allocation less error-prone (size of the struct wasn't taken into account at all). Signed-off-by: Jiri Slaby (SUSE) Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230112080136.4929-6-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/console_struct.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 1518568aaf0f..539f1cd45309 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -18,7 +18,6 @@ #include struct uni_pagedict; -struct uni_screen; #define NPAR 16 #define VC_TABSTOPS_COUNT 256U @@ -159,7 +158,7 @@ struct vc_data { struct vc_data **vc_display_fg; /* [!] Ptr to var holding fg console for this display */ struct uni_pagedict *uni_pagedict; struct uni_pagedict **uni_pagedict_loc; /* [!] Location of uni_pagedict variable for this console */ - struct uni_screen *vc_uni_screen; /* unicode screen content */ + u32 **vc_uni_lines; /* unicode screen content */ /* additional information is in vt_kern.h */ }; -- cgit v1.2.3 From 7de06d8455211d2d875bb174e361ccb6faa5ae3f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 29 Dec 2022 16:50:29 +0100 Subject: soc: qcom-geni-se: add more symbol definitions The following symbols will be used when adding support for SE DMA in the qcom geni serial driver. Signed-off-by: Bartosz Golaszewski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20221229155030.418800-14-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- include/linux/qcom-geni-se.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h index f5672785c0c4..400213daa461 100644 --- a/include/linux/qcom-geni-se.h +++ b/include/linux/qcom-geni-se.h @@ -103,6 +103,7 @@ struct geni_se { #define SE_DMA_TX_FSM_RST 0xc58 #define SE_DMA_RX_IRQ_STAT 0xd40 #define SE_DMA_RX_IRQ_CLR 0xd44 +#define SE_DMA_RX_LEN_IN 0xd54 #define SE_DMA_RX_FSM_RST 0xd58 #define SE_HW_PARAM_0 0xe24 #define SE_HW_PARAM_1 0xe28 @@ -235,6 +236,8 @@ struct geni_se { #define RX_SBE BIT(2) #define RX_RESET_DONE BIT(3) #define RX_FLUSH_DONE BIT(4) +#define RX_DMA_PARITY_ERR BIT(5) +#define RX_DMA_BREAK GENMASK(8, 7) #define RX_GENI_GP_IRQ GENMASK(10, 5) #define RX_GENI_CANCEL_IRQ BIT(11) #define RX_GENI_GP_IRQ_EXT GENMASK(13, 12) -- cgit v1.2.3 From 163f080eb717d237f02d9a8c179b07ed31fdd6ad Mon Sep 17 00:00:00 2001 From: Christoph Niedermaier Date: Fri, 2 Dec 2022 11:41:25 +0100 Subject: serial: core: Add option to output RS485 RX_DURING_TX state via GPIO This patch provides a generic GPIO variable for outputting the state of RS485 RX_DURING_TX. The GPIO is defined by the devicetree property "rs485-rx-during-tx-gpios". To use it in a low level serial driver, the evaluation of this variable must be implemented there accordingly. Signed-off-by: Christoph Niedermaier Link: https://lore.kernel.org/r/20221202104127.122761-2-cniedermaier@dh-electronics.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 026294c6ccb8..0b37a86bedc5 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -579,6 +579,7 @@ struct uart_port { struct serial_rs485 rs485; struct serial_rs485 rs485_supported; /* Supported mask for serial_rs485 */ struct gpio_desc *rs485_term_gpio; /* enable RS485 bus termination */ + struct gpio_desc *rs485_rx_during_tx_gpio; /* Output GPIO that sets the state of RS485 RX during TX */ struct serial_iso7816 iso7816; void *private_data; /* generic platform data pointer */ }; -- cgit v1.2.3 From b300fb26c59a749bf49559932fa8a85eb916b5a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:52 +0200 Subject: tty: Convert ->carrier_raised() and callchains to bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return boolean from ->carrier_raised() instead of 0 and 1. Make the return type change also to tty_port_carrier_raised() that makes the ->carrier_raised() call (+ cd variable in moxa into which its return value is stored). Also cleans up a few unnecessary constructs related to this change: return xx ? 1 : 0; -> return xx; if (xx) return 1; return 0; -> return xx; Reviewed-by: Jiri Slaby Acked-by: Ulf Hansson # For MMC Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-7-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index fa3c3bdaa234..cf098459cb01 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -15,7 +15,7 @@ struct tty_struct; /** * struct tty_port_operations -- operations on tty_port - * @carrier_raised: return 1 if the carrier is raised on @port + * @carrier_raised: return true if the carrier is raised on @port * @dtr_rts: raise the DTR line if @raise is nonzero, otherwise lower DTR * @shutdown: called when the last close completes or a hangup finishes IFF the * port was initialized. Do not use to free resources. Turn off the device @@ -31,7 +31,7 @@ struct tty_struct; * the port itself. */ struct tty_port_operations { - int (*carrier_raised)(struct tty_port *port); + bool (*carrier_raised)(struct tty_port *port); void (*dtr_rts)(struct tty_port *port, int raise); void (*shutdown)(struct tty_port *port); int (*activate)(struct tty_port *port, struct tty_struct *tty); @@ -230,7 +230,7 @@ static inline void tty_port_set_kopened(struct tty_port *port, bool val) struct tty_struct *tty_port_tty_get(struct tty_port *port); void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); -int tty_port_carrier_raised(struct tty_port *port); +bool tty_port_carrier_raised(struct tty_port *port); void tty_port_raise_dtr_rts(struct tty_port *port); void tty_port_lower_dtr_rts(struct tty_port *port); void tty_port_hangup(struct tty_port *port); -- cgit v1.2.3 From 5d420399073770134d2b03e004b2c0201c7fa26f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:53 +0200 Subject: tty: Convert ->dtr_rts() to take bool argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the raise/on parameter in ->dtr_rts() to bool through the callchain. The parameter is used like bool. In USB serial, there remains a few implicit bool -> larger type conversions because some devices use u8 in their control messages. In moxa_tiocmget(), dtr variable was reused for line status which requires int so use a separate variable for status. Reviewed-by: Jiri Slaby Acked-by: Ulf Hansson # For MMC Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-8-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index cf098459cb01..c44e489de0ff 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -16,7 +16,7 @@ struct tty_struct; /** * struct tty_port_operations -- operations on tty_port * @carrier_raised: return true if the carrier is raised on @port - * @dtr_rts: raise the DTR line if @raise is nonzero, otherwise lower DTR + * @dtr_rts: raise the DTR line if @raise is true, otherwise lower DTR * @shutdown: called when the last close completes or a hangup finishes IFF the * port was initialized. Do not use to free resources. Turn off the device * only. Called under the port mutex to serialize against @activate and @@ -32,7 +32,7 @@ struct tty_struct; */ struct tty_port_operations { bool (*carrier_raised)(struct tty_port *port); - void (*dtr_rts)(struct tty_port *port, int raise); + void (*dtr_rts)(struct tty_port *port, bool raise); void (*shutdown)(struct tty_port *port); int (*activate)(struct tty_port *port, struct tty_struct *tty); void (*destruct)(struct tty_port *port); -- cgit v1.2.3 From 0388a152fc5544be82e736343496f99c4eef8d62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:54 +0200 Subject: tty/serial: Make ->dcd_change()+uart_handle_dcd_change() status bool active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert status parameter for ->dcd_change() and uart_handle_dcd_change() to bool which matches to how the parameter is used. Rename status to active to better describe what the parameter means. Acked-by: Rodolfo Giometti Reviewed-by: Jiri Slaby Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-9-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 3 +-- include/linux/tty_ldisc.h | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 0b37a86bedc5..8c4187c4884a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -897,8 +897,7 @@ static inline bool uart_softcts_mode(struct uart_port *uport) * The following are helper functions for the low level drivers. */ -extern void uart_handle_dcd_change(struct uart_port *uport, - unsigned int status); +extern void uart_handle_dcd_change(struct uart_port *uport, bool active); extern void uart_handle_cts_change(struct uart_port *uport, unsigned int status); diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index dcb61ec11424..49dc172dedc7 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -170,7 +170,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * send, please arise a tasklet or workqueue to do the real data transfer. * Do not send data in this hook, it may lead to a deadlock. * - * @dcd_change: [DRV] ``void ()(struct tty_struct *tty, unsigned int status)`` + * @dcd_change: [DRV] ``void ()(struct tty_struct *tty, bool active)`` * * Tells the discipline that the DCD pin has changed its status. Used * exclusively by the %N_PPS (Pulse-Per-Second) line discipline. @@ -238,7 +238,7 @@ struct tty_ldisc_ops { void (*receive_buf)(struct tty_struct *tty, const unsigned char *cp, const char *fp, int count); void (*write_wakeup)(struct tty_struct *tty); - void (*dcd_change)(struct tty_struct *tty, unsigned int status); + void (*dcd_change)(struct tty_struct *tty, bool active); int (*receive_buf2)(struct tty_struct *tty, const unsigned char *cp, const char *fp, int count); void (*lookahead_buf)(struct tty_struct *tty, const unsigned char *cp, -- cgit v1.2.3 From 968d64578ec92968e8c79d766eb966efd1f68d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:55 +0200 Subject: serial: Make uart_handle_cts_change() status param bool active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert uart_handle_cts_change() to bool which is more appropriate than unsigned int. Rename status to active to better describe what the parameter means. While at it, make the comment about the active parameter easier to parse. Cleanup callsites from operations that are not necessary with bool. Reviewed-by: Jiri Slaby Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-10-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8c4187c4884a..9e3e5e0d11b2 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -898,8 +898,7 @@ static inline bool uart_softcts_mode(struct uart_port *uport) */ extern void uart_handle_dcd_change(struct uart_port *uport, bool active); -extern void uart_handle_cts_change(struct uart_port *uport, - unsigned int status); +extern void uart_handle_cts_change(struct uart_port *uport, bool active); extern void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, unsigned int ch, unsigned int flag); -- cgit v1.2.3 From 87f22db4c251ff92d588c2cc710031a59d5e4ec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:56 +0200 Subject: tty: Return bool from tty_termios_hw_change() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change tty_termios_hw_change() to return bool. Reviewed-by: Jiri Slaby Reviewed-by: Johan Hovold Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-11-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 730c3301d710..093935e97f42 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -453,7 +453,7 @@ unsigned char tty_get_char_size(unsigned int cflag); unsigned char tty_get_frame_size(unsigned int cflag); void tty_termios_copy_hw(struct ktermios *new, const struct ktermios *old); -int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); +bool tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); void tty_wakeup(struct tty_struct *tty); -- cgit v1.2.3 From 5701cb8bf50e1c723553344b3f731b308da8ea21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:57 +0200 Subject: tty: Call ->dtr_rts() parameter active consistently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert various parameter names for ->dtr_rts() and related functions from onoff, on, and raise to active. Reviewed-by: Jiri Slaby Acked-by: Ulf Hansson # For MMC Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-12-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index c44e489de0ff..edf685a24f7c 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -16,7 +16,7 @@ struct tty_struct; /** * struct tty_port_operations -- operations on tty_port * @carrier_raised: return true if the carrier is raised on @port - * @dtr_rts: raise the DTR line if @raise is true, otherwise lower DTR + * @dtr_rts: raise the DTR line if @active is true, otherwise lower DTR * @shutdown: called when the last close completes or a hangup finishes IFF the * port was initialized. Do not use to free resources. Turn off the device * only. Called under the port mutex to serialize against @activate and @@ -32,7 +32,7 @@ struct tty_struct; */ struct tty_port_operations { bool (*carrier_raised)(struct tty_port *port); - void (*dtr_rts)(struct tty_port *port, bool raise); + void (*dtr_rts)(struct tty_port *port, bool active); void (*shutdown)(struct tty_port *port); int (*activate)(struct tty_port *port, struct tty_struct *tty); void (*destruct)(struct tty_port *port); -- cgit v1.2.3 From 4747ab89b4a652f835494fcf8342aaa0efb9b0fd Mon Sep 17 00:00:00 2001 From: Matthew Gerlach Date: Sun, 15 Jan 2023 07:14:46 -0800 Subject: fpga: dfl: add basic support for DFHv1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Version 1 of the Device Feature Header (DFH) definition adds functionality to the Device Feature List (DFL) bus. A DFHv1 header may have one or more parameter blocks that further describes the HW to SW. Add support to the DFL bus to parse the MSI-X parameter. The location of a feature's register set is explicitly described in DFHv1 and can be relative to the base of the DFHv1 or an absolute address. Parse the location and pass the information to DFL driver. Signed-off-by: Matthew Gerlach Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230115151447.1353428-4-matthew.gerlach@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/dfl.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dfl.h b/include/linux/dfl.h index 431636a0dc78..0a7a00a0ee7f 100644 --- a/include/linux/dfl.h +++ b/include/linux/dfl.h @@ -27,11 +27,15 @@ enum dfl_id_type { * @id: id of the dfl device. * @type: type of DFL FIU of the device. See enum dfl_id_type. * @feature_id: feature identifier local to its DFL FIU type. + * @revision: revision of this dfl device feature. * @mmio_res: mmio resource of this dfl device. * @irqs: list of Linux IRQ numbers of this dfl device. * @num_irqs: number of IRQs supported by this dfl device. * @cdev: pointer to DFL FPGA container device this dfl device belongs to. * @id_entry: matched id entry in dfl driver's id table. + * @dfh_version: version of DFH for the device + * @param_size: size of the block parameters in bytes + * @params: pointer to block of parameters copied memory */ struct dfl_device { struct device dev; @@ -44,6 +48,9 @@ struct dfl_device { unsigned int num_irqs; struct dfl_fpga_cdev *cdev; const struct dfl_device_id *id_entry; + u8 dfh_version; + unsigned int param_size; + void *params; }; /** @@ -84,4 +91,5 @@ void dfl_driver_unregister(struct dfl_driver *dfl_drv); module_driver(__dfl_driver, dfl_driver_register, \ dfl_driver_unregister) +void *dfh_find_param(struct dfl_device *dfl_dev, int param_id, size_t *pcount); #endif /* __LINUX_DFL_H */ -- cgit v1.2.3 From d41e127757f37d0d47ccda4af2fe74c32533f798 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 18 Jan 2023 11:01:36 +0100 Subject: net: mdio: Rework scanning of bus ready for quirks Some C22 PHYs do bad things when there are C45 transactions on the bus. In order to handle this, the bus needs to be scanned first for C22 at all addresses, and then C45 scanned for all addresses. The Marvell pxa168 driver scans a specific address on the bus to find its PHY. This is a C22 only device, so update it to use the c22 helper. Signed-off-by: Andrew Lunn Signed-off-by: Michael Walle Reviewed-by: Jesse Brandeburg Signed-off-by: Paolo Abeni --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index b3cf1e08e880..fceaac0fb319 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -464,7 +464,7 @@ static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev) } struct mii_bus *mdio_find_bus(const char *mdio_name); -struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); +struct phy_device *mdiobus_scan_c22(struct mii_bus *bus, int addr); #define PHY_INTERRUPT_DISABLED false #define PHY_INTERRUPT_ENABLED true -- cgit v1.2.3 From 3486593374858b41ae6ef7720cb28ff39ad822f3 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 18 Jan 2023 11:01:37 +0100 Subject: net: mdio: Add workaround for Micrel PHYs which are not C45 compatible After scanning the bus for C22 devices, check if any Micrel PHYs have been found. They are known to do bad things if there are C45 transactions on the bus. Prevent the scanning of the bus using C45 if such a PHY has been detected. Signed-off-by: Andrew Lunn Signed-off-by: Michael Walle Signed-off-by: Paolo Abeni --- include/linux/micrel_phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 1f7c33b2f5a3..771e050883db 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -8,6 +8,8 @@ #ifndef _MICREL_PHY_H #define _MICREL_PHY_H +#define MICREL_OUI 0x0885 + #define MICREL_PHY_ID_MASK 0x00fffff0 #define PHY_ID_KSZ8873MLL 0x000e7237 -- cgit v1.2.3 From da099a7fb13db67678ecfa03f8ac5df8134c991b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 18 Jan 2023 11:01:40 +0100 Subject: net: phy: Remove probe_capabilities Deciding if to probe of PHYs using C45 is now determine by if the bus provides the C45 read method. This makes probe_capabilities redundant so remove it. Signed-off-by: Andrew Lunn Signed-off-by: Michael Walle Reviewed-by: Jesse Brandeburg Acked-by: Andrew Jeffery Signed-off-by: Paolo Abeni --- include/linux/phy.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index fceaac0fb319..fbeba4fee8d4 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -419,14 +419,6 @@ struct mii_bus { /** @reset_gpiod: Reset GPIO descriptor pointer */ struct gpio_desc *reset_gpiod; - /** @probe_capabilities: bus capabilities, used for probing */ - enum { - MDIOBUS_NO_CAP = 0, - MDIOBUS_C22, - MDIOBUS_C45, - MDIOBUS_C22_C45, - } probe_capabilities; - /** @shared_lock: protect access to the shared element */ struct mutex shared_lock; -- cgit v1.2.3 From ffc1e089725e3f8a15ddfdce283db42f7d0fa147 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Thu, 19 Jan 2023 16:19:15 +0100 Subject: VT: Add height parameter to con_font_get/set consw operations The current con_font_get/set API currently hardcodes a 32-pixel-tall limitation, which only dates from the old VGA hardware which could not handle taller fonts than that. This change just adds a vpitch parameter to release this constraint. Drivers which do not support vpitch != 32 can just return EINVAL when it is not 32, font loading tools will revert to trying 32 and succeed. This change makes the fbcon driver consider vpitch appropriately, thus making it able to load large fonts. Signed-off-by: Samuel Thibault Link: https://lore.kernel.org/r/20230119151934.932642243@ens-lyon.org Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 9cea254b34b8..9e52de94a583 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -59,8 +59,9 @@ struct consw { int (*con_switch)(struct vc_data *vc); int (*con_blank)(struct vc_data *vc, int blank, int mode_switch); int (*con_font_set)(struct vc_data *vc, struct console_font *font, - unsigned int flags); - int (*con_font_get)(struct vc_data *vc, struct console_font *font); + unsigned int vpitch, unsigned int flags); + int (*con_font_get)(struct vc_data *vc, struct console_font *font, + unsigned int vpitch); int (*con_font_default)(struct vc_data *vc, struct console_font *font, char *name); int (*con_resize)(struct vc_data *vc, unsigned int width, -- cgit v1.2.3 From e610e81464e4e52645a0f5d259b8a9e3632db6ff Mon Sep 17 00:00:00 2001 From: "Garmin.Chang" Date: Fri, 23 Dec 2022 16:05:53 +0800 Subject: soc: mediatek: pm-domains: Add support for mt8188 Add domain control data including bus protection data size change due to more protection steps in mt8188. Signed-off-by: Garmin.Chang Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20221223080553.9397-3-Garmin.Chang@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 121 ++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 50804ac748bd..07f67b3d8e97 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -140,6 +140,127 @@ #define MT8192_TOP_AXI_PROT_EN_MM_2_MDP_2ND BIT(13) #define MT8192_TOP_AXI_PROT_EN_VDNR_CAM BIT(21) +#define MT8188_TOP_AXI_PROT_EN_SET 0x2A0 +#define MT8188_TOP_AXI_PROT_EN_CLR 0x2A4 +#define MT8188_TOP_AXI_PROT_EN_STA 0x228 +#define MT8188_TOP_AXI_PROT_EN_1_SET 0x2A8 +#define MT8188_TOP_AXI_PROT_EN_1_CLR 0x2AC +#define MT8188_TOP_AXI_PROT_EN_1_STA 0x258 +#define MT8188_TOP_AXI_PROT_EN_2_SET 0x714 +#define MT8188_TOP_AXI_PROT_EN_2_CLR 0x718 +#define MT8188_TOP_AXI_PROT_EN_2_STA 0x724 + +#define MT8188_TOP_AXI_PROT_EN_MM_SET 0x2D4 +#define MT8188_TOP_AXI_PROT_EN_MM_CLR 0x2D8 +#define MT8188_TOP_AXI_PROT_EN_MM_STA 0x2EC +#define MT8188_TOP_AXI_PROT_EN_MM_2_SET 0xDCC +#define MT8188_TOP_AXI_PROT_EN_MM_2_CLR 0xDD0 +#define MT8188_TOP_AXI_PROT_EN_MM_2_STA 0xDD8 + +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_SET 0xB84 +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_CLR 0xB88 +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_STA 0xB90 +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_SET 0xBCC +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_CLR 0xBD0 +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_STA 0xBD8 + +#define MT8188_TOP_AXI_PROT_EN_MFG1_STEP1 BIT(11) +#define MT8188_TOP_AXI_PROT_EN_2_MFG1_STEP2 BIT(7) +#define MT8188_TOP_AXI_PROT_EN_1_MFG1_STEP3 BIT(19) +#define MT8188_TOP_AXI_PROT_EN_2_MFG1_STEP4 BIT(5) +#define MT8188_TOP_AXI_PROT_EN_MFG1_STEP5 GENMASK(22, 21) +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_MFG1_STEP6 BIT(17) + +#define MT8188_TOP_AXI_PROT_EN_PEXTP_MAC_P0_STEP1 BIT(2) +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_PEXTP_MAC_P0_STEP2 (BIT(8) | BIT(18) | BIT(30)) +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_ETHER_STEP1 BIT(24) +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_HDMI_TX_STEP1 BIT(20) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_AO_STEP1 GENMASK(31, 29) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_AO_STEP2 (GENMASK(4, 3) | BIT(28)) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_INFRA_STEP1 (GENMASK(16, 14) | BIT(23) | \ + BIT(27)) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_INFRA_STEP2 (GENMASK(19, 17) | GENMASK(26, 25)) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_STEP1 GENMASK(11, 8) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_STEP2 GENMASK(22, 21) +#define MT8188_TOP_AXI_PROT_EN_2_AUDIO_STEP1 BIT(20) +#define MT8188_TOP_AXI_PROT_EN_2_AUDIO_STEP2 BIT(12) +#define MT8188_TOP_AXI_PROT_EN_2_AUDIO_ASRC_STEP1 BIT(24) +#define MT8188_TOP_AXI_PROT_EN_2_AUDIO_ASRC_STEP2 BIT(13) + +#define MT8188_TOP_AXI_PROT_EN_VPPSYS0_STEP1 BIT(10) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VPPSYS0_STEP2 GENMASK(9, 8) +#define MT8188_TOP_AXI_PROT_EN_VPPSYS0_STEP3 BIT(23) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VPPSYS0_STEP4 (BIT(1) | BIT(4) | BIT(11)) +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_VPPSYS0_STEP5 (BIT(20)) +#define MT8188_TOP_AXI_PROT_EN_MM_VDOSYS0_STEP1 (GENMASK(18, 17) | GENMASK(21, 20)) +#define MT8188_TOP_AXI_PROT_EN_VDOSYS0_STEP2 BIT(6) +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_VDOSYS0_STEP3 BIT(21) +#define MT8188_TOP_AXI_PROT_EN_MM_VDOSYS1_STEP1 GENMASK(31, 30) +#define MT8188_TOP_AXI_PROT_EN_MM_VDOSYS1_STEP2 BIT(22) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VDOSYS1_STEP3 BIT(10) +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_DP_TX_STEP1 BIT(23) +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_EDP_TX_STEP1 BIT(22) + +#define MT8188_TOP_AXI_PROT_EN_MM_VPPSYS1_STEP1 GENMASK(6, 5) +#define MT8188_TOP_AXI_PROT_EN_MM_VPPSYS1_STEP2 BIT(23) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VPPSYS1_STEP3 BIT(18) +#define MT8188_TOP_AXI_PROT_EN_MM_2_WPE_STEP1 BIT(23) +#define MT8188_TOP_AXI_PROT_EN_MM_2_WPE_STEP2 BIT(21) +#define MT8188_TOP_AXI_PROT_EN_MM_VDEC0_STEP1 BIT(13) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VDEC0_STEP2 BIT(13) +#define MT8188_TOP_AXI_PROT_EN_MM_VDEC1_STEP1 BIT(14) +#define MT8188_TOP_AXI_PROT_EN_MM_VDEC1_STEP2 BIT(29) +#define MT8188_TOP_AXI_PROT_EN_MM_VENC_STEP1 (BIT(9) | BIT(11)) +#define MT8188_TOP_AXI_PROT_EN_MM_VENC_STEP2 BIT(26) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VENC_STEP3 BIT(2) +#define MT8188_TOP_AXI_PROT_EN_MM_IMG_VCORE_STEP1 (BIT(1) | BIT(3)) +#define MT8188_TOP_AXI_PROT_EN_MM_IMG_VCORE_STEP2 BIT(25) +#define MT8188_TOP_AXI_PROT_EN_MM_2_IMG_VCORE_STEP3 BIT(16) +#define MT8188_TOP_AXI_PROT_EN_MM_2_IMG_MAIN_STEP1 GENMASK(27, 26) +#define MT8188_TOP_AXI_PROT_EN_MM_2_IMG_MAIN_STEP2 GENMASK(25, 24) +#define MT8188_TOP_AXI_PROT_EN_MM_CAM_VCORE_STEP1 (BIT(2) | BIT(4)) +#define MT8188_TOP_AXI_PROT_EN_2_CAM_VCORE_STEP2 BIT(0) +#define MT8188_TOP_AXI_PROT_EN_1_CAM_VCORE_STEP3 BIT(22) +#define MT8188_TOP_AXI_PROT_EN_MM_CAM_VCORE_STEP4 BIT(24) +#define MT8188_TOP_AXI_PROT_EN_MM_2_CAM_VCORE_STEP5 BIT(17) +#define MT8188_TOP_AXI_PROT_EN_MM_2_CAM_MAIN_STEP1 GENMASK(31, 30) +#define MT8188_TOP_AXI_PROT_EN_2_CAM_MAIN_STEP2 BIT(2) +#define MT8188_TOP_AXI_PROT_EN_MM_2_CAM_MAIN_STEP3 GENMASK(29, 28) +#define MT8188_TOP_AXI_PROT_EN_2_CAM_MAIN_STEP4 BIT(1) + +#define MT8188_SMI_COMMON_CLAMP_EN_STA 0x3C0 +#define MT8188_SMI_COMMON_CLAMP_EN_SET 0x3C4 +#define MT8188_SMI_COMMON_CLAMP_EN_CLR 0x3C8 + +#define MT8188_SMI_COMMON_SMI_CLAMP_DIP_TO_VDO0 GENMASK(3, 1) +#define MT8188_SMI_COMMON_SMI_CLAMP_DIP_TO_VPP1 GENMASK(2, 1) +#define MT8188_SMI_COMMON_SMI_CLAMP_IPE_TO_VPP1 BIT(0) + +#define MT8188_SMI_COMMON_SMI_CLAMP_CAM_SUBA_TO_VPP0 GENMASK(3, 2) +#define MT8188_SMI_COMMON_SMI_CLAMP_CAM_SUBB_TO_VDO0 GENMASK(3, 2) + +#define MT8188_SMI_LARB10_RESET_ADDR 0xC +#define MT8188_SMI_LARB11A_RESET_ADDR 0xC +#define MT8188_SMI_LARB11C_RESET_ADDR 0xC +#define MT8188_SMI_LARB12_RESET_ADDR 0xC +#define MT8188_SMI_LARB11B_RESET_ADDR 0xC +#define MT8188_SMI_LARB15_RESET_ADDR 0xC +#define MT8188_SMI_LARB16B_RESET_ADDR 0xA0 +#define MT8188_SMI_LARB17B_RESET_ADDR 0xA0 +#define MT8188_SMI_LARB16A_RESET_ADDR 0xA0 +#define MT8188_SMI_LARB17A_RESET_ADDR 0xA0 + +#define MT8188_SMI_LARB10_RESET BIT(0) +#define MT8188_SMI_LARB11A_RESET BIT(0) +#define MT8188_SMI_LARB11C_RESET BIT(0) +#define MT8188_SMI_LARB12_RESET BIT(8) +#define MT8188_SMI_LARB11B_RESET BIT(0) +#define MT8188_SMI_LARB15_RESET BIT(0) +#define MT8188_SMI_LARB16B_RESET BIT(4) +#define MT8188_SMI_LARB17B_RESET BIT(4) +#define MT8188_SMI_LARB16A_RESET BIT(4) +#define MT8188_SMI_LARB17A_RESET BIT(4) + #define MT8186_TOP_AXI_PROT_EN_SET (0x2A0) #define MT8186_TOP_AXI_PROT_EN_CLR (0x2A4) #define MT8186_TOP_AXI_PROT_EN_STA (0x228) -- cgit v1.2.3 From cecafc0a830f7ea89bc11c644e2841f603fcc4e6 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 5 Jan 2023 21:01:27 +0800 Subject: KVM: MMU: Make the definition of 'INVALID_GPA' common KVM already has a 'GPA_INVALID' defined as (~(gpa_t)0) in kvm_types.h, and it is used by ARM code. We do not need another definition of 'INVALID_GPA' for X86 specifically. Instead of using the common 'GPA_INVALID' for X86, replace it with 'INVALID_GPA', and change the users of 'GPA_INVALID' so that the diff can be smaller. Also because the name 'INVALID_GPA' tells the user we are using an invalid GPA, while the name 'GPA_INVALID' is emphasizing the GPA is an invalid one. No functional change intended. Signed-off-by: Yu Zhang Reviewed-by: Paul Durrant Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20230105130127.866171-1-yu.c.zhang@linux.intel.com Signed-off-by: Oliver Upton --- include/linux/kvm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 76de36e56cdf..2728d49bbdf6 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -40,7 +40,7 @@ typedef unsigned long gva_t; typedef u64 gpa_t; typedef u64 gfn_t; -#define GPA_INVALID (~(gpa_t)0) +#define INVALID_GPA (~(gpa_t)0) typedef unsigned long hva_t; typedef u64 hpa_t; -- cgit v1.2.3 From 344da544f177f919cf6919e5abcd388f27aa53db Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 16 Dec 2022 16:52:01 -0800 Subject: x86/nmi: Print reasons why backtrace NMIs are ignored Instrument nmi_trigger_cpumask_backtrace() to dump out diagnostics based on evidence accumulated by exc_nmi(). These diagnostics are dumped for CPUs that ignored an NMI backtrace request for more than 10 seconds. [ paulmck: Apply Ingo Molnar feedback. ] Signed-off-by: Paul E. McKenney Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Reviewed-by: Ingo Molnar --- include/linux/nmi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index f700ff2df074..048c0b9aa623 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -214,4 +214,12 @@ int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); #include #endif +#ifdef CONFIG_NMI_CHECK_CPU +void nmi_backtrace_stall_snap(const struct cpumask *btp); +void nmi_backtrace_stall_check(const struct cpumask *btp); +#else +static inline void nmi_backtrace_stall_snap(const struct cpumask *btp) {} +static inline void nmi_backtrace_stall_check(const struct cpumask *btp) {} +#endif + #endif -- cgit v1.2.3 From 07cc2c931e8e1083a31f4c51d2244fe264af63bf Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 16 Jan 2023 11:10:07 +0100 Subject: livepatch: Improve the search performance of module_kallsyms_on_each_symbol() Currently we traverse all symbols of all modules to find the specified function for the specified module. But in reality, we just need to find the given module and then traverse all the symbols in it. Let's add a new parameter 'const char *modname' to function module_kallsyms_on_each_symbol(), then we can compare the module names directly in this function and call hook 'fn' after matching. If 'modname' is NULL, the symbols of all modules are still traversed for compatibility with other usage cases. Phase1: mod1-->mod2..(subsequent modules do not need to be compared) | Phase2: -->f1-->f2-->f3 Assuming that there are m modules, each module has n symbols on average, then the time complexity is reduced from O(m * n) to O(m) + O(n). Reviewed-by: Petr Mladek Acked-by: Song Liu Signed-off-by: Zhen Lei Signed-off-by: Jiri Olsa Acked-by: Miroslav Benes Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20230116101009.23694-2-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/module.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 8c5909c0076c..514bc81568c5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -879,11 +879,13 @@ static inline bool module_sig_ok(struct module *module) #endif /* CONFIG_MODULE_SIG */ #if defined(CONFIG_MODULES) && defined(CONFIG_KALLSYMS) -int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, +int module_kallsyms_on_each_symbol(const char *modname, + int (*fn)(void *, const char *, struct module *, unsigned long), void *data); #else -static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, +static inline int module_kallsyms_on_each_symbol(const char *modname, + int (*fn)(void *, const char *, struct module *, unsigned long), void *data) { -- cgit v1.2.3 From 65a20c2eb96d698206846b27f9ac5ab05a24d569 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 18 Jan 2023 16:21:07 +0100 Subject: net/mlx5: Remove MLX5E_LOCKED_FLOW flag The MLX5E_LOCKED_FLOW flag is not checked anywhere now so remove it entirely. Signed-off-by: Jiri Pirko Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski --- include/linux/mlx5/driver.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b957b8f22a6b..44167760ff29 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -554,10 +554,6 @@ enum { * creation/deletion on drivers rescan. Unset during device attach. */ MLX5_PRIV_FLAGS_DETACH = 1 << 2, - /* Distinguish between mlx5e_probe/remove called by module init/cleanup - * and called by other flows which can already hold devlink lock - */ - MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW = 1 << 3, }; struct mlx5_adev { -- cgit v1.2.3 From 13e80244ca7e51d5eb7803f05e0579b11fc89048 Mon Sep 17 00:00:00 2001 From: Hanna Hawa Date: Wed, 28 Dec 2022 16:48:12 +0000 Subject: pinctrl: Add an API to get the pinctrl pins if initialized Add an API to get the pinctrl pins if it was initialized before driver probed. This API will be used in I2C core to get the device pinctrl information for recovery state change. Signed-off-by: Hanna Hawa Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Wolfram Sang --- include/linux/pinctrl/devinfo.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/devinfo.h b/include/linux/pinctrl/devinfo.h index 9e8b559e1253..bb6653af4f92 100644 --- a/include/linux/pinctrl/devinfo.h +++ b/include/linux/pinctrl/devinfo.h @@ -18,6 +18,8 @@ struct device; #ifdef CONFIG_PINCTRL +#include + /* The device core acts as a consumer toward pinctrl */ #include @@ -44,6 +46,14 @@ struct dev_pin_info { extern int pinctrl_bind_pins(struct device *dev); extern int pinctrl_init_done(struct device *dev); +static inline struct pinctrl *dev_pinctrl(struct device *dev) +{ + if (!dev->pins) + return NULL; + + return dev->pins->p; +} + #else /* Stubs if we're not using pinctrl */ @@ -58,5 +68,10 @@ static inline int pinctrl_init_done(struct device *dev) return 0; } +static inline struct pinctrl *dev_pinctrl(struct device *dev) +{ + return NULL; +} + #endif /* CONFIG_PINCTRL */ #endif /* PINCTRL_DEVINFO_H */ -- cgit v1.2.3 From d2c865553adb7ad9abf3894cbb209cf1bc95484a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Jan 2023 17:41:14 +0100 Subject: firmware: zynqmp: fix declarations for gcc-13 gcc-13.0.1 reports a type mismatch for two functions: drivers/firmware/xilinx/zynqmp.c:1228:5: error: conflicting types for 'zynqmp_pm_set_rpu_mode' due to enum/integer mismatch; have 'int(u32, enum rpu_oper_mode)' {aka 'int(unsigned int, enum rpu_oper_mode)'} [-Werror=enum-int-mismatch] 1228 | int zynqmp_pm_set_rpu_mode(u32 node_id, enum rpu_oper_mode rpu_mode) | ^~~~~~~~~~~~~~~~~~~~~~ In file included from drivers/firmware/xilinx/zynqmp.c:25: include/linux/firmware/xlnx-zynqmp.h:552:5: note: previous declaration of 'zynqmp_pm_set_rpu_mode' with type 'int(u32, u32)' {aka 'int(unsigned int, unsigned int)'} 552 | int zynqmp_pm_set_rpu_mode(u32 node_id, u32 arg1); | ^~~~~~~~~~~~~~~~~~~~~~ drivers/firmware/xilinx/zynqmp.c:1246:5: error: conflicting types for 'zynqmp_pm_set_tcm_config' due to enum/integer mismatch; have 'int(u32, enum rpu_tcm_comb)' {aka 'int(unsigned int, enum rpu_tcm_comb)'} [-Werror=enum-int-mismatch] 1246 | int zynqmp_pm_set_tcm_config(u32 node_id, enum rpu_tcm_comb tcm_mode) | ^~~~~~~~~~~~~~~~~~~~~~~~ include/linux/firmware/xlnx-zynqmp.h:553:5: note: previous declaration of 'zynqmp_pm_set_tcm_config' with type 'int(u32, u32)' {aka 'int(unsigned int, unsigned int)'} 553 | int zynqmp_pm_set_tcm_config(u32 node_id, u32 arg1); | ^~~~~~~~~~~~~~~~~~~~~~~~ Change the declaration in the header to match the function definition. Acked-by: Michal Simek Signed-off-by: Arnd Bergmann --- include/linux/firmware/xlnx-zynqmp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index b986e267d149..b09f443d3ab9 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -545,8 +545,8 @@ int zynqmp_pm_request_wake(const u32 node, const u64 address, const enum zynqmp_pm_request_ack ack); int zynqmp_pm_get_rpu_mode(u32 node_id, enum rpu_oper_mode *rpu_mode); -int zynqmp_pm_set_rpu_mode(u32 node_id, u32 arg1); -int zynqmp_pm_set_tcm_config(u32 node_id, u32 arg1); +int zynqmp_pm_set_rpu_mode(u32 node_id, enum rpu_oper_mode rpu_mode); +int zynqmp_pm_set_tcm_config(u32 node_id, enum rpu_tcm_comb tcm_mode); int zynqmp_pm_set_sd_config(u32 node, enum pm_sd_config_type config, u32 value); int zynqmp_pm_set_gem_config(u32 node, enum pm_gem_config_type config, u32 value); @@ -845,12 +845,12 @@ static inline int zynqmp_pm_get_rpu_mode(u32 node_id, enum rpu_oper_mode *rpu_mo return -ENODEV; } -static inline int zynqmp_pm_set_rpu_mode(u32 node_id, u32 arg1) +static inline int zynqmp_pm_set_rpu_mode(u32 node_id, enum rpu_oper_mode rpu_mode) { return -ENODEV; } -static inline int zynqmp_pm_set_tcm_config(u32 node_id, u32 arg1) +static inline int zynqmp_pm_set_tcm_config(u32 node_id, enum rpu_tcm_comb tcm_mode) { return -ENODEV; } -- cgit v1.2.3 From 0ddc052416a3a34700b8a16bad02d31369419553 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Sep 2022 15:20:02 +0200 Subject: ARM: pxa: remove irda leftover irda support was removed a long time ago, so stop registering the devices from the pxa machine. Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- include/linux/platform_data/irda-pxaficp.h | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 include/linux/platform_data/irda-pxaficp.h (limited to 'include/linux') diff --git a/include/linux/platform_data/irda-pxaficp.h b/include/linux/platform_data/irda-pxaficp.h deleted file mode 100644 index bd35ddcf3068..000000000000 --- a/include/linux/platform_data/irda-pxaficp.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASMARM_ARCH_IRDA_H -#define ASMARM_ARCH_IRDA_H - -/* board specific transceiver capabilities */ - -#define IR_OFF 1 -#define IR_SIRMODE 2 -#define IR_FIRMODE 4 - -struct pxaficp_platform_data { - int transceiver_cap; - void (*transceiver_mode)(struct device *dev, int mode); - int (*startup)(struct device *dev); - void (*shutdown)(struct device *dev); - int gpio_pwdown; /* powerdown GPIO for the IrDA chip */ - bool gpio_pwdown_inverted; /* gpio_pwdown is inverted */ -}; - -extern void pxa_set_ficp_info(struct pxaficp_platform_data *info); - -#if defined(CONFIG_PXA25x) || defined(CONFIG_PXA27x) -void pxa2xx_transceiver_mode(struct device *dev, int mode); -#endif - -#endif -- cgit v1.2.3 From 8ca79aaad8becbda085e740c521a792f281c8a6d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 08:17:44 +0200 Subject: ARM: pxa: remove unused pxa3xx-ulpi This was only used by the cm-x300 board, which is now gone. Cc: Alan Stern Cc: linux-usb@vger.kernel.org Acked-by: Robert Jarzmik Acked-by: Greg Kroah-Hartman Signed-off-by: Arnd Bergmann --- include/linux/platform_data/usb-pxa3xx-ulpi.h | 32 --------------------------- 1 file changed, 32 deletions(-) delete mode 100644 include/linux/platform_data/usb-pxa3xx-ulpi.h (limited to 'include/linux') diff --git a/include/linux/platform_data/usb-pxa3xx-ulpi.h b/include/linux/platform_data/usb-pxa3xx-ulpi.h deleted file mode 100644 index 4d31a5cbdeb1..000000000000 --- a/include/linux/platform_data/usb-pxa3xx-ulpi.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * PXA3xx U2D header - * - * Copyright (C) 2010 CompuLab Ltd. - * - * Igor Grinberg - */ -#ifndef __PXA310_U2D__ -#define __PXA310_U2D__ - -#include - -struct pxa3xx_u2d_platform_data { - -#define ULPI_SER_6PIN (1 << 0) -#define ULPI_SER_3PIN (1 << 1) - unsigned int ulpi_mode; - - int (*init)(struct device *); - void (*exit)(struct device *); -}; - - -/* Start PXA3xx U2D host */ -int pxa3xx_u2d_start_hc(struct usb_bus *host); -/* Stop PXA3xx U2D host */ -void pxa3xx_u2d_stop_hc(struct usb_bus *host); - -extern void pxa3xx_set_u2d_info(struct pxa3xx_u2d_platform_data *info); - -#endif /* __PXA310_U2D__ */ -- cgit v1.2.3 From 347651485af1f9111438fc12f9dbe2a79cc3c95b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 13:30:40 +0200 Subject: power: remove z2_battery driver The PXA z2 platform is gone, and this driver is now orphaned. Cc: linux-pm@vger.kernel.org Cc: Peter Edwards Acked-by: Sebastian Reichel Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- include/linux/z2_battery.h | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 include/linux/z2_battery.h (limited to 'include/linux') diff --git a/include/linux/z2_battery.h b/include/linux/z2_battery.h deleted file mode 100644 index 9e8be7a7cd25..000000000000 --- a/include/linux/z2_battery.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_Z2_BATTERY_H -#define _LINUX_Z2_BATTERY_H - -struct z2_battery_info { - int batt_I2C_bus; - int batt_I2C_addr; - int batt_I2C_reg; - int min_voltage; - int max_voltage; - int batt_div; - int batt_mult; - int batt_tech; - char *batt_name; -}; - -#endif -- cgit v1.2.3 From e3e289fbc0b520cf469469e8cdba84a50424eb65 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 19 Nov 2022 07:48:15 +0000 Subject: uacce: supports device isolation feature UACCE adds the hardware error isolation feature. To improve service reliability, some uacce devices that frequently encounter hardware errors are isolated. Therefore, this feature is added. Users can configure the hardware error threshold by 'isolate_strategy' sysfs node. The user space can get the device isolated state by 'isolate' sysfs node. If the number of device errors exceeds the configured error threshold, the device will be isolated. It means the uacce device is unavailable. Signed-off-by: Kai Ye Link: https://lore.kernel.org/r/20221119074817.12063-2-yekai13@huawei.com Signed-off-by: Greg Kroah-Hartman --- include/linux/uacce.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uacce.h b/include/linux/uacce.h index 9ce88c28b0a8..0a81c3dfd26c 100644 --- a/include/linux/uacce.h +++ b/include/linux/uacce.h @@ -8,6 +8,7 @@ #define UACCE_NAME "uacce" #define UACCE_MAX_REGION 2 #define UACCE_MAX_NAME_SIZE 64 +#define UACCE_MAX_ERR_THRESHOLD 65535 struct uacce_queue; struct uacce_device; @@ -30,6 +31,9 @@ struct uacce_qfile_region { * @is_q_updated: check whether the task is finished * @mmap: mmap addresses of queue to user space * @ioctl: ioctl for user space users of the queue + * @get_isolate_state: get the device state after set the isolate strategy + * @isolate_err_threshold_write: stored the isolate error threshold to the device + * @isolate_err_threshold_read: read the isolate error threshold value from the device */ struct uacce_ops { int (*get_available_instances)(struct uacce_device *uacce); @@ -43,6 +47,9 @@ struct uacce_ops { struct uacce_qfile_region *qfr); long (*ioctl)(struct uacce_queue *q, unsigned int cmd, unsigned long arg); + enum uacce_dev_state (*get_isolate_state)(struct uacce_device *uacce); + int (*isolate_err_threshold_write)(struct uacce_device *uacce, u32 num); + u32 (*isolate_err_threshold_read)(struct uacce_device *uacce); }; /** @@ -57,6 +64,11 @@ struct uacce_interface { const struct uacce_ops *ops; }; +enum uacce_dev_state { + UACCE_DEV_NORMAL, + UACCE_DEV_ISOLATE, +}; + enum uacce_q_state { UACCE_Q_ZOMBIE = 0, UACCE_Q_INIT, -- cgit v1.2.3 From cd0ac51c5760d4eed4981be5de9cad0255976512 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 19 Nov 2022 07:48:17 +0000 Subject: crypto: hisilicon/qm - define the device isolation strategy Define the device isolation strategy by the device driver. The user configures a hardware error threshold value by uacce interface. If the number of hardware errors exceeds the value of setting error threshold in one hour. The device will not be available in user space. The VF device use the PF device isolation strategy. All the hardware errors are processed by PF driver. Signed-off-by: Kai Ye Acked-by: Herbert Xu Link: https://lore.kernel.org/r/20221119074817.12063-4-yekai13@huawei.com Signed-off-by: Greg Kroah-Hartman --- include/linux/hisi_acc_qm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index be3aedaa96dc..d08bff0f87f9 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -272,6 +272,20 @@ struct hisi_qm_poll_data { u16 *qp_finish_id; }; +/** + * struct qm_err_isolate + * @isolate_lock: protects device error log + * @err_threshold: user config error threshold which triggers isolation + * @is_isolate: device isolation state + * @uacce_hw_errs: index into qm device error list + */ +struct qm_err_isolate { + struct mutex isolate_lock; + u32 err_threshold; + bool is_isolate; + struct list_head qm_hw_errs; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -341,6 +355,7 @@ struct hisi_qm { struct qm_shaper_factor *factor; u32 mb_qos; u32 type_rate; + struct qm_err_isolate isolate_data; }; struct hisi_qp_status { -- cgit v1.2.3 From b3c71626a9333b0b29f9921a39cef30b5961766f Mon Sep 17 00:00:00 2001 From: Mao Jinlong Date: Tue, 17 Jan 2023 06:57:01 -0800 Subject: Coresight: Add coresight TPDM source driver Add driver to support Coresight device TPDM (Trace, Profiling and Diagnostics Monitor). TPDM is a monitor to collect data from different datasets. This change is to add probe/enable/disable functions for tpdm source. Signed-off-by: Tao Zhang Signed-off-by: Mao Jinlong Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230120095301.30792-1-quic_jinlmao@quicinc.com --- include/linux/coresight.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index e241eb88dfb9..f19a47b9bb5a 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -61,6 +61,7 @@ enum coresight_dev_subtype_source { CORESIGHT_DEV_SUBTYPE_SOURCE_PROC, CORESIGHT_DEV_SUBTYPE_SOURCE_BUS, CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE, + CORESIGHT_DEV_SUBTYPE_SOURCE_OTHERS, }; enum coresight_dev_subtype_helper { -- cgit v1.2.3 From 1766ac5248063c25d1fe46e04bb936c46313ed89 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Jan 2023 17:10:47 +0100 Subject: ASoC: ux500: remove platform_data support The platform data definition for ux500 sound devices was removed six years ago after the DT conversion was completed, see commit 4b483ed0be8b ("ARM: ux500: cut some platform data"). Remove some leftover bits in the ASoC driver and just assume that it always gets probed using DT. Signed-off-by: Arnd Bergmann Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20230118161110.521504-3-arnd@kernel.org Signed-off-by: Mark Brown --- include/linux/platform_data/asoc-ux500-msp.h | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 include/linux/platform_data/asoc-ux500-msp.h (limited to 'include/linux') diff --git a/include/linux/platform_data/asoc-ux500-msp.h b/include/linux/platform_data/asoc-ux500-msp.h deleted file mode 100644 index b8d0f730dda8..000000000000 --- a/include/linux/platform_data/asoc-ux500-msp.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2010 - * - * Author: Rabin Vincent for ST-Ericsson - */ - -#ifndef __MSP_H -#define __MSP_H - -#include - -/* Platform data structure for a MSP I2S-device */ -struct msp_i2s_platform_data { - int id; - struct stedma40_chan_cfg *msp_i2s_dma_rx; - struct stedma40_chan_cfg *msp_i2s_dma_tx; -}; - -#endif -- cgit v1.2.3 From 6640727fc55b7f0b561e927bc9c5b9d4c459fd8e Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 8 Jan 2023 21:56:52 +0000 Subject: parport_pc: Let chipset drivers mask ECR bits on writes Provide an `ecr_writable' parameter to `__parport_pc_probe_port' so that callers can specify a mask of bits to modify on ECR writes. To avoid the need for separate bit set and bit clear masks always set bit 0 whenever a non-zero mask has been set, as all the currently known cases where a mask is required, that is Oxford Semiconductor devices, do require this bit to be set. If further cases are discovered where the bit is required to be clear, we can update code accordingly, but chances are very low as the bit is supposed to be read-only[1]. Skip ECR probing, which can be problematic as the Oxford Semiconductor OX12PCI840 part has been reported to lock up on setting bit 2, whenever a non-zero mask has been requested by a port subdriver, assuming that the ECR must be there if the subdriver has requested a specific way to access it. References: [1] "Extended Capabilities Port Protocol and ISA Interface Standard", Microsoft Corporation, Revision: 1.14, July 14, 1993, Table 14 "Extended Control Register" Signed-off-by: Maciej W. Rozycki Signed-off-by: Sudip Mukherjee Link: https://lore.kernel.org/r/20230108215656.6433-3-sudipm.mukherjee@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/parport_pc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/parport_pc.h b/include/linux/parport_pc.h index 3d6fc576d6a1..f1ec5c10c3b3 100644 --- a/include/linux/parport_pc.h +++ b/include/linux/parport_pc.h @@ -26,6 +26,9 @@ struct parport_pc_private { /* Whether or not there's an ECR. */ int ecr; + /* Bitmask of writable ECR bits. */ + unsigned char ecr_writable; + /* Number of PWords that FIFO will hold. */ int fifo_depth; -- cgit v1.2.3 From a86367803838b369fe5486ac18771d14723c258c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 10 Nov 2022 18:23:07 +0800 Subject: drivers: base: transport_class: fix possible memory leak Current some drivers(like iscsi) call transport_register_device() failed, they don't call transport_destroy_device() to release the memory allocated in transport_setup_device(), because they don't know what was done, it should be internal thing to release the resource in register function. So fix this leak by calling destroy function inside register function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221110102307.3492557-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- include/linux/transport_class.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/transport_class.h b/include/linux/transport_class.h index 63076fb835e3..2efc271a96fa 100644 --- a/include/linux/transport_class.h +++ b/include/linux/transport_class.h @@ -70,8 +70,14 @@ void transport_destroy_device(struct device *); static inline int transport_register_device(struct device *dev) { + int ret; + transport_setup_device(dev); - return transport_add_device(dev); + ret = transport_add_device(dev); + if (ret) + transport_destroy_device(dev); + + return ret; } static inline void -- cgit v1.2.3 From 1b94ad7ccc21c002ca324d4fe6c11078bb4d4c9a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 19 Jan 2023 21:11:00 +0200 Subject: ACPI: utils: Add acpi_evaluate_dsm_typed() and acpi_check_dsm() stubs When the ACPI part of a driver is optional the methods used in it are expected to be available even if CONFIG_ACPI=n. This is not the case for _DSM related methods. Add stubs for acpi_evaluate_dsm_typed() and acpi_check_dsm() methods. Reported-by: kernel test robot Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/acpi.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5e6a876e17ba..4b12dad5a8a4 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -950,6 +950,12 @@ static inline bool acpi_driver_match_device(struct device *dev, return false; } +static inline bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, + u64 rev, u64 funcs) +{ + return false; +} + static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, @@ -958,6 +964,15 @@ static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, return NULL; } +static inline union acpi_object *acpi_evaluate_dsm_typed(acpi_handle handle, + const guid_t *guid, + u64 rev, u64 func, + union acpi_object *argv4, + acpi_object_type type) +{ + return NULL; +} + static inline int acpi_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) { -- cgit v1.2.3 From f8064ab90d6644bc8338d2d7ff6a0d6e7a1b2ef3 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Jan 2023 05:52:33 +0530 Subject: bpf: Invalidate slices on destruction of dynptrs on stack The previous commit implemented destroy_if_dynptr_stack_slot. It destroys the dynptr which given spi belongs to, but still doesn't invalidate the slices that belong to such a dynptr. While for the case of referenced dynptr, we don't allow their overwrite and return an error early, we still allow it and destroy the dynptr for unreferenced dynptr. To be able to enable precise and scoped invalidation of dynptr slices in this case, we must be able to associate the source dynptr of slices that have been obtained using bpf_dynptr_data. When doing destruction, only slices belonging to the dynptr being destructed should be invalidated, and nothing else. Currently, dynptr slices belonging to different dynptrs are indistinguishible. Hence, allocate a unique id to each dynptr (CONST_PTR_TO_DYNPTR and those on stack). This will be stored as part of reg->id. Whenever using bpf_dynptr_data, transfer this unique dynptr id to the returned PTR_TO_MEM_OR_NULL slice pointer, and store it in a new per-PTR_TO_MEM dynptr_id register state member. Finally, after establishing such a relationship between dynptrs and their slices, implement precise invalidation logic that only invalidates slices belong to the destroyed dynptr in destroy_if_dynptr_stack_slot. Acked-by: Joanne Koong Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230121002241.2113993-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 127058cfec47..aa83de1fe755 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -70,7 +70,10 @@ struct bpf_reg_state { u32 btf_id; }; - u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ + struct { /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ + u32 mem_size; + u32 dynptr_id; /* for dynptr slices */ + }; /* For dynptr stack slots */ struct { -- cgit v1.2.3 From 99d5fe9c7f3d0b349676984beda4bad109bf10b8 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 19 Jan 2023 14:07:00 +0100 Subject: net: mdio: Remove support for building C45 muxed addresses The old way of performing a C45 bus transfer created a special register value and passed it to the MDIO bus driver, in the hope it would see the MII_ADDR_C45 bit set, and perform a C45 transfer. Now that there is a clear separation of C22 and C45, this scheme is no longer used. Remove all the #defines and helpers, to prevent any code being added which tries to use it. Signed-off-by: Andrew Lunn Signed-off-by: Michael Walle Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- include/linux/mdio.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 220f3ca8702d..c0da30d63b1d 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -10,14 +10,6 @@ #include #include -/* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit - * IEEE 802.3ae clause 45 addressing mode used by 10GIGE phy chips. - */ -#define MII_ADDR_C45 (1<<30) -#define MII_DEVADDR_C45_SHIFT 16 -#define MII_DEVADDR_C45_MASK GENMASK(20, 16) -#define MII_REGADDR_C45_MASK GENMASK(15, 0) - struct gpio_desc; struct mii_bus; struct reset_control; @@ -463,16 +455,6 @@ static inline int mdiodev_modify_changed(struct mdio_device *mdiodev, mask, set); } -static inline u16 mdiobus_c45_regad(u32 regnum) -{ - return FIELD_GET(MII_REGADDR_C45_MASK, regnum); -} - -static inline u16 mdiobus_c45_devad(u32 regnum) -{ - return FIELD_GET(MII_DEVADDR_C45_MASK, regnum); -} - static inline int mdiodev_c45_modify(struct mdio_device *mdiodev, int devad, u32 regnum, u16 mask, u16 set) { -- cgit v1.2.3 From 24a7fffb2533bb44d4ec84bb824732a9c4882f15 Mon Sep 17 00:00:00 2001 From: Nikhil Gupta Date: Fri, 20 Jan 2023 02:10:34 +0530 Subject: ptp_qoriq: fix latency in ptp_qoriq_adjtime() operation 1588 driver loses about 1us in adjtime operation at PTP slave This is because adjtime operation uses a slow non-atomic tmr_cnt_read() followed by tmr_cnt_write() operation. In the above sequence, since the timer counter operation keeps incrementing, it leads to latency. The tmr_offset register (which is added to TMR_CNT_H/L register giving the current time) must be programmed with the delta nanoseconds. Signed-off-by: Nikhil Gupta Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230119204034.7969-1-nikhil.gupta@nxp.com Signed-off-by: Jakub Kicinski --- include/linux/fsl/ptp_qoriq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 01acebe37fab..b301bf7199d3 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -149,6 +149,7 @@ struct ptp_qoriq { struct device *dev; bool extts_fifo_support; bool fiper3_support; + bool etsec; int irq; int phc_index; u32 tclk_period; /* nanoseconds */ -- cgit v1.2.3 From aca1d27ac38a61d7db4b56418386992cb96b63f0 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Thu, 19 Jan 2023 14:03:57 -0500 Subject: efi: xen: Implement memory descriptor lookup based on hypercall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xen on x86 boots dom0 in EFI mode but without providing a memory map. This means that some consistency checks we would like to perform on configuration tables or other data structures in memory are not currently possible. Xen does, however, expose EFI memory descriptor info via a Xen hypercall, so let's wire that up instead. It turns out that the returned information is not identical to what Linux's efi_mem_desc_lookup would return: the address returned is the address passed to the hypercall, and the size returned is the number of bytes remaining in the configuration table. However, none of the callers of efi_mem_desc_lookup() currently care about this. In the future, Xen may gain a hypercall that returns the actual start address, which can be used instead. Co-developed-by: Ard Biesheuvel Signed-off-by: Ard Biesheuvel Signed-off-by: Demi Marie Obenour Tested-by: Marek Marczykowski-Górecki Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index f6b107da1cbc..4d7a44f60990 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -731,6 +731,7 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int __init efi_uart_console_only (void); extern u64 efi_mem_desc_end(efi_memory_desc_t *md); extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); +extern int __efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern void efi_mem_reserve(phys_addr_t addr, u64 size); extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size); extern void efi_initialize_iomem_resources(struct resource *code_resource, -- cgit v1.2.3 From e699600232e0ca6237b996aa1a94a056cf776582 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 20 Jan 2023 18:03:43 +0900 Subject: firewire: cdev: obsolete NULL check to detect IEC 61883-1 FCP region In the character device, the listener to address space should distinguish whether the request is to IEC 61883-1 FCP region or not. The user space application needs to access to the object of request in enough later by read(2), while the core function releases the object of request in the FCP case after completing the callback to handler. The handler guarantees the access safe by some way. It's done by duplication of the object after NULL check to the request, since core function passes NULL in the FCP case. It's inconvenient since the object of request includes some helpful information. It's better to add another way to check whether the request is to FCP region or not. Conveniently the file of transaction layer includes local implementation for the purpose. This commit moves it to module local file and use it instead of the NULL check, then the result of check is stored to per-client data for the inbound transaction so that the result can be referred by later to release the data. Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20230120090344.296451-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- include/linux/firewire.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 980019053e54..56505436d159 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -278,9 +278,8 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode, * Otherwise there is a danger of recursion of inbound and outbound * transactions from and to the local node. * - * The callback is responsible that either fw_send_response() or kfree() - * is called on the @request, except for FCP registers for which the core - * takes care of that. + * The callback is responsible that fw_send_response() is called on the @request, except for FCP + * registers for which the core takes care of that. */ typedef void (*fw_address_callback_t)(struct fw_card *card, struct fw_request *request, -- cgit v1.2.3 From c0fecaa44dc341d86e4ce96efcda9ea8b4c106af Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Thu, 19 Jan 2023 14:03:58 -0500 Subject: efi: Apply allowlist to EFI configuration tables when running under Xen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As it turns out, Xen does not guarantee that EFI boot services data regions in memory are preserved, which means that EFI configuration tables pointing into such memory regions may be corrupted before the dom0 OS has had a chance to inspect them. This is causing problems for Qubes OS when it attempts to perform system firmware updates, which requires that the contents of the EFI System Resource Table are valid when the fwupd userspace program runs. However, other configuration tables such as the memory attributes table or the runtime properties table are equally affected, and so we need a comprehensive workaround that works for any table type. So when running under Xen, check the EFI memory descriptor covering the start of the table, and disregard the table if it does not reside in memory that is preserved by Xen. Co-developed-by: Ard Biesheuvel Signed-off-by: Ard Biesheuvel Signed-off-by: Demi Marie Obenour Tested-by: Marek Marczykowski-Górecki Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4d7a44f60990..1a1adc8d3ba3 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1322,4 +1322,14 @@ struct linux_efi_initrd { /* Header of a populated EFI secret area */ #define EFI_SECRET_TABLE_HEADER_GUID EFI_GUID(0x1e74f542, 0x71dd, 0x4d66, 0x96, 0x3e, 0xef, 0x42, 0x87, 0xff, 0x17, 0x3b) +bool xen_efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table); + +static inline +bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table) +{ + if (!IS_ENABLED(CONFIG_XEN_EFI)) + return true; + return xen_efi_config_table_is_usable(guid, table); +} + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From 43651e60aa167974955b23f73a25eb79886ab6d0 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 16 Jan 2023 10:47:35 +0100 Subject: mtd: rawnand: Fix nand_chip kdoc Describe the continuous read nand_chip fields to avoid the following htmldocs warning: include/linux/mtd/rawnand.h:1325: warning: Function parameter or member 'cont_read' not described in 'nand_chip' Reported-by: Stephen Rothwell Fixes: 003fe4b9545b ("mtd: rawnand: Support for sequential cache reads") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230116094735.11483-1-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 1b0936fe3c6e..f8d4be9c587a 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1260,6 +1260,10 @@ struct nand_secure_region { * @read_retries: The number of read retry modes supported * @secure_regions: Structure containing the secure regions info * @nr_secure_regions: Number of secure regions + * @cont_read: Sequential page read internals + * @cont_read.ongoing: Whether a continuous read is ongoing or not + * @cont_read.first_page: Start of the continuous read operation + * @cont_read.last_page: End of the continuous read operation * @controller: The hardware controller structure which is shared among multiple * independent devices * @ecc: The ECC controller structure -- cgit v1.2.3 From 2b30f8291a30305eeedcd787b4d0e352410f7268 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 19 Jan 2023 14:26:54 +0200 Subject: net: ethtool: add support for MAC Merge layer The MAC merge sublayer (IEEE 802.3-2018 clause 99) is one of 2 specifications (the other being Frame Preemption; IEEE 802.1Q-2018 clause 6.7.2), which work together to minimize latency caused by frame interference at TX. The overall goal of TSN is for normal traffic and traffic with a bounded deadline to be able to cohabitate on the same L2 network and not bother each other too much. The standards achieve this (partly) by introducing the concept of preemptible traffic, i.e. Ethernet frames that have a custom value for the Start-of-Frame-Delimiter (SFD), and these frames can be fragmented and reassembled at L2 on a link-local basis. The non-preemptible frames are called express traffic, they are transmitted using a normal SFD, and they can preempt preemptible frames, therefore having lower latency, which can matter at lower (100 Mbps) link speeds, or at high MTUs (jumbo frames around 9K). Preemption is not recursive, i.e. a P frame cannot preempt another P frame. Preemption also does not depend upon priority, or otherwise said, an E frame with prio 0 will still preempt a P frame with prio 7. In terms of implementation, the standards talk about the presence of an express MAC (eMAC) which handles express traffic, and a preemptible MAC (pMAC) which handles preemptible traffic, and these MACs are multiplexed on the same MII by a MAC merge layer. To support frame preemption, the definition of the SFD was generalized to SMD (Start-of-mPacket-Delimiter), where an mPacket is essentially an Ethernet frame fragment, or a complete frame. Stations unaware of an SMD value different from the standard SFD will treat P frames as error frames. To prevent that from happening, a negotiation process is defined. On RX, packets are dispatched to the eMAC or pMAC after being filtered by their SMD. On TX, the eMAC/pMAC classification decision is taken by the 802.1Q spec, based on packet priority (each of the 8 user priority values may have an admin-status of preemptible or express). The MAC Merge layer and the Frame Preemption parameters have some degree of independence in terms of how software stacks are supposed to deal with them. The activation of the MM layer is supposed to be controlled by an LLDP daemon (after it has been communicated that the link partner also supports it), after which a (hardware-based or not) verification handshake takes place, before actually enabling the feature. So the process is intended to be relatively plug-and-play. Whereas FP settings are supposed to be coordinated across a network using something approximating NETCONF. The support contained here is exclusively for the 802.3 (MAC Merge) portions and not for the 802.1Q (Frame Preemption) parts. This API is sufficient for an LLDP daemon to do its job. The FP adminStatus variable from 802.1Q is outside the scope of an LLDP daemon. I have taken a few creative licenses and augmented the Linux kernel UAPI compared to the standard managed objects recommended by IEEE 802.3. These are: - ETHTOOL_A_MM_PMAC_ENABLED: According to Figure 99-6: Receive Processing state diagram, a MAC Merge layer is always supposed to be able to receive P frames. However, this implies keeping the pMAC powered on, which will consume needless power in applications where FP will never be used. If LLDP is used, the reception of an Additional Ethernet Capabilities TLV from the link partner is sufficient indication that the pMAC should be enabled. So my proposal is that in Linux, we keep the pMAC turned off by default and that user space turns it on when needed. - ETHTOOL_A_MM_VERIFY_ENABLED: The IEEE managed object is called aMACMergeVerifyDisableTx. I opted for consistency (positive logic) in the boolean netlink attributes offered, so this is also positive here. Other than the meaning being reversed, they correspond to the same thing. - ETHTOOL_A_MM_MAX_VERIFY_TIME: I found it most reasonable for a LLDP daemon to maximize the verifyTime variable (delay between SMD-V transmissions), to maximize its chances that the LP replies. IEEE says that the verifyTime can range between 1 and 128 ms, but the NXP ENETC stupidly keeps this variable in a 7 bit register, so the maximum supported value is 127 ms. I could have chosen to hardcode this in the LLDP daemon to a lower value, but why not let the kernel expose its supported range directly. - ETHTOOL_A_MM_TX_MIN_FRAG_SIZE: the standard managed object is called aMACMergeAddFragSize, and expresses the "additional" fragment size (on top of ETH_ZLEN), whereas this expresses the absolute value of the fragment size. - ETHTOOL_A_MM_RX_MIN_FRAG_SIZE: there doesn't appear to exist a managed object mandated by the standard, but user space clearly needs to know what is the minimum supported fragment size of our local receiver, since LLDP must advertise a value no lower than that. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/ethtool.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 20d197693d34..37eba38da502 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -477,6 +477,98 @@ struct ethtool_module_power_mode_params { enum ethtool_module_power_mode mode; }; +/** + * struct ethtool_mm_state - 802.3 MAC merge layer state + * @verify_time: + * wait time between verification attempts in ms (according to clause + * 30.14.1.6 aMACMergeVerifyTime) + * @max_verify_time: + * maximum accepted value for the @verify_time variable in set requests + * @verify_status: + * state of the verification state machine of the MM layer (according to + * clause 30.14.1.2 aMACMergeStatusVerify) + * @tx_enabled: + * set if the MM layer is administratively enabled in the TX direction + * (according to clause 30.14.1.3 aMACMergeEnableTx) + * @tx_active: + * set if the MM layer is enabled in the TX direction, which makes FP + * possible (according to 30.14.1.5 aMACMergeStatusTx). This should be + * true if MM is enabled, and the verification status is either verified, + * or disabled. + * @pmac_enabled: + * set if the preemptible MAC is powered on and is able to receive + * preemptible packets and respond to verification frames. + * @verify_enabled: + * set if the Verify function of the MM layer (which sends SMD-V + * verification requests) is administratively enabled (regardless of + * whether it is currently in the ETHTOOL_MM_VERIFY_STATUS_DISABLED state + * or not), according to clause 30.14.1.4 aMACMergeVerifyDisableTx (but + * using positive rather than negative logic). The device should always + * respond to received SMD-V requests as long as @pmac_enabled is set. + * @tx_min_frag_size: + * the minimum size of non-final mPacket fragments that the link partner + * supports receiving, expressed in octets. Compared to the definition + * from clause 30.14.1.7 aMACMergeAddFragSize which is expressed in the + * range 0 to 3 (requiring a translation to the size in octets according + * to the formula 64 * (1 + addFragSize) - 4), a value in a continuous and + * unbounded range can be specified here. + * @rx_min_frag_size: + * the minimum size of non-final mPacket fragments that this device + * supports receiving, expressed in octets. + */ +struct ethtool_mm_state { + u32 verify_time; + u32 max_verify_time; + enum ethtool_mm_verify_status verify_status; + bool tx_enabled; + bool tx_active; + bool pmac_enabled; + bool verify_enabled; + u32 tx_min_frag_size; + u32 rx_min_frag_size; +}; + +/** + * struct ethtool_mm_cfg - 802.3 MAC merge layer configuration + * @verify_time: see struct ethtool_mm_state + * @verify_enabled: see struct ethtool_mm_state + * @tx_enabled: see struct ethtool_mm_state + * @pmac_enabled: see struct ethtool_mm_state + * @tx_min_frag_size: see struct ethtool_mm_state + */ +struct ethtool_mm_cfg { + u32 verify_time; + bool verify_enabled; + bool tx_enabled; + bool pmac_enabled; + u32 tx_min_frag_size; +}; + +/** + * struct ethtool_mm_stats - 802.3 MAC merge layer statistics + * @MACMergeFrameAssErrorCount: + * received MAC frames with reassembly errors + * @MACMergeFrameSmdErrorCount: + * received MAC frames/fragments rejected due to unknown or incorrect SMD + * @MACMergeFrameAssOkCount: + * received MAC frames that were successfully reassembled and passed up + * @MACMergeFragCountRx: + * number of additional correct SMD-C mPackets received due to preemption + * @MACMergeFragCountTx: + * number of additional mPackets sent due to preemption + * @MACMergeHoldCount: + * number of times the MM layer entered the HOLD state, which blocks + * transmission of preemptible traffic + */ +struct ethtool_mm_stats { + u64 MACMergeFrameAssErrorCount; + u64 MACMergeFrameSmdErrorCount; + u64 MACMergeFrameAssOkCount; + u64 MACMergeFragCountRx; + u64 MACMergeFragCountTx; + u64 MACMergeHoldCount; +}; + /** * struct ethtool_ops - optional netdev operations * @cap_link_lanes_supported: indicates if the driver supports lanes @@ -649,6 +741,9 @@ struct ethtool_module_power_mode_params { * plugged-in. * @set_module_power_mode: Set the power mode policy for the plug-in module * used by the network device. + * @get_mm: Query the 802.3 MAC Merge layer state. + * @set_mm: Set the 802.3 MAC Merge layer parameters. + * @get_mm_stats: Query the 802.3 MAC Merge layer statistics. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -787,6 +882,10 @@ struct ethtool_ops { int (*set_module_power_mode)(struct net_device *dev, const struct ethtool_module_power_mode_params *params, struct netlink_ext_ack *extack); + int (*get_mm)(struct net_device *dev, struct ethtool_mm_state *state); + int (*set_mm)(struct net_device *dev, struct ethtool_mm_cfg *cfg, + struct netlink_ext_ack *extack); + void (*get_mm_stats)(struct net_device *dev, struct ethtool_mm_stats *stats); }; int ethtool_check_ops(const struct ethtool_ops *ops); -- cgit v1.2.3 From 04692c9020b76939715d6f2b4ff84d832246e0fc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 19 Jan 2023 14:26:56 +0200 Subject: net: ethtool: netlink: retrieve stats from multiple sources (eMAC, pMAC) IEEE 802.3-2018 clause 99 defines a MAC Merge sublayer which contains an Express MAC and a Preemptible MAC. Both MACs are hidden to higher and lower layers and visible as a single MAC (packet classification to eMAC or pMAC on TX is done based on priority; classification on RX is done based on SFD). For devices which support a MAC Merge sublayer, it is desirable to retrieve individual packet counters from the eMAC and the pMAC, as well as aggregate statistics (their sum). Introduce a new ETHTOOL_A_STATS_SRC attribute which is part of the policy of ETHTOOL_MSG_STATS_GET and, and an ETHTOOL_A_PAUSE_STATS_SRC which is part of the policy of ETHTOOL_MSG_PAUSE_GET (accepted when ETHTOOL_FLAG_STATS is set in the common ethtool header). Both of these take values from enum ethtool_mac_stats_src, defaulting to "aggregate" in the absence of the attribute. Existing drivers do not need to pay attention to this enum which was added to all driver-facing structures, just the ones which report the MAC merge layer as supported. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/ethtool.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 37eba38da502..0ccba6612190 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -311,6 +311,7 @@ static inline void ethtool_stats_init(u64 *stats, unsigned int n) * via a more targeted API. */ struct ethtool_eth_mac_stats { + enum ethtool_mac_stats_src src; u64 FramesTransmittedOK; u64 SingleCollisionFrames; u64 MultipleCollisionFrames; @@ -339,6 +340,7 @@ struct ethtool_eth_mac_stats { * via a more targeted API. */ struct ethtool_eth_phy_stats { + enum ethtool_mac_stats_src src; u64 SymbolErrorDuringCarrier; }; @@ -346,6 +348,7 @@ struct ethtool_eth_phy_stats { * via a more targeted API. */ struct ethtool_eth_ctrl_stats { + enum ethtool_mac_stats_src src; u64 MACControlFramesTransmitted; u64 MACControlFramesReceived; u64 UnsupportedOpcodesReceived; @@ -353,6 +356,8 @@ struct ethtool_eth_ctrl_stats { /** * struct ethtool_pause_stats - statistics for IEEE 802.3x pause frames + * @src: input field denoting whether stats should be queried from the eMAC or + * pMAC (if the MM layer is supported). To be ignored otherwise. * @tx_pause_frames: transmitted pause frame count. Reported to user space * as %ETHTOOL_A_PAUSE_STAT_TX_FRAMES. * @@ -366,6 +371,7 @@ struct ethtool_eth_ctrl_stats { * from the standard. */ struct ethtool_pause_stats { + enum ethtool_mac_stats_src src; u64 tx_pause_frames; u64 rx_pause_frames; }; @@ -417,6 +423,8 @@ struct ethtool_rmon_hist_range { /** * struct ethtool_rmon_stats - selected RMON (RFC 2819) statistics + * @src: input field denoting whether stats should be queried from the eMAC or + * pMAC (if the MM layer is supported). To be ignored otherwise. * @undersize_pkts: Equivalent to `etherStatsUndersizePkts` from the RFC. * @oversize_pkts: Equivalent to `etherStatsOversizePkts` from the RFC. * @fragments: Equivalent to `etherStatsFragments` from the RFC. @@ -432,6 +440,7 @@ struct ethtool_rmon_hist_range { * ranges is left to the driver. */ struct ethtool_rmon_stats { + enum ethtool_mac_stats_src src; u64 undersize_pkts; u64 oversize_pkts; u64 fragments; -- cgit v1.2.3 From 449c5459641ad72a504884abb9fb9b19ee31397b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 19 Jan 2023 14:26:58 +0200 Subject: net: ethtool: add helpers for aggregate statistics When a pMAC exists but the driver is unable to atomically query the aggregate eMAC+pMAC statistics, the user should be given back at least the sum of eMAC and pMAC counters queried separately. This is a generic problem, so add helpers in ethtool to do this operation, if the driver doesn't have a better way to report aggregate stats. Do this in a way that does not require changes to these functions when new stats are added (basically treat the structures as an array of u64 values, except for the first element which is the stats source). In include/linux/ethtool.h, there is already a section where helper function prototypes should be placed. The trouble is, this section is too early, before the definitions of struct ethtool_eth_mac_stats et.al. Move that section at the end and append these new helpers to it. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/ethtool.h | 100 +++++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 0ccba6612190..6746dee5a3fd 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -106,11 +106,6 @@ enum ethtool_supported_ring_param { struct net_device; struct netlink_ext_ack; -/* Some generic methods drivers may use in their ethtool_ops */ -u32 ethtool_op_get_link(struct net_device *dev); -int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti); - - /* Link extended state and substate. */ struct ethtool_link_ext_state_info { enum ethtool_link_ext_state link_ext_state; @@ -312,28 +307,30 @@ static inline void ethtool_stats_init(u64 *stats, unsigned int n) */ struct ethtool_eth_mac_stats { enum ethtool_mac_stats_src src; - u64 FramesTransmittedOK; - u64 SingleCollisionFrames; - u64 MultipleCollisionFrames; - u64 FramesReceivedOK; - u64 FrameCheckSequenceErrors; - u64 AlignmentErrors; - u64 OctetsTransmittedOK; - u64 FramesWithDeferredXmissions; - u64 LateCollisions; - u64 FramesAbortedDueToXSColls; - u64 FramesLostDueToIntMACXmitError; - u64 CarrierSenseErrors; - u64 OctetsReceivedOK; - u64 FramesLostDueToIntMACRcvError; - u64 MulticastFramesXmittedOK; - u64 BroadcastFramesXmittedOK; - u64 FramesWithExcessiveDeferral; - u64 MulticastFramesReceivedOK; - u64 BroadcastFramesReceivedOK; - u64 InRangeLengthErrors; - u64 OutOfRangeLengthField; - u64 FrameTooLongErrors; + struct_group(stats, + u64 FramesTransmittedOK; + u64 SingleCollisionFrames; + u64 MultipleCollisionFrames; + u64 FramesReceivedOK; + u64 FrameCheckSequenceErrors; + u64 AlignmentErrors; + u64 OctetsTransmittedOK; + u64 FramesWithDeferredXmissions; + u64 LateCollisions; + u64 FramesAbortedDueToXSColls; + u64 FramesLostDueToIntMACXmitError; + u64 CarrierSenseErrors; + u64 OctetsReceivedOK; + u64 FramesLostDueToIntMACRcvError; + u64 MulticastFramesXmittedOK; + u64 BroadcastFramesXmittedOK; + u64 FramesWithExcessiveDeferral; + u64 MulticastFramesReceivedOK; + u64 BroadcastFramesReceivedOK; + u64 InRangeLengthErrors; + u64 OutOfRangeLengthField; + u64 FrameTooLongErrors; + ); }; /* Basic IEEE 802.3 PHY statistics (30.3.2.1.*), not otherwise exposed @@ -341,7 +338,9 @@ struct ethtool_eth_mac_stats { */ struct ethtool_eth_phy_stats { enum ethtool_mac_stats_src src; - u64 SymbolErrorDuringCarrier; + struct_group(stats, + u64 SymbolErrorDuringCarrier; + ); }; /* Basic IEEE 802.3 MAC Ctrl statistics (30.3.3.*), not otherwise exposed @@ -349,9 +348,11 @@ struct ethtool_eth_phy_stats { */ struct ethtool_eth_ctrl_stats { enum ethtool_mac_stats_src src; - u64 MACControlFramesTransmitted; - u64 MACControlFramesReceived; - u64 UnsupportedOpcodesReceived; + struct_group(stats, + u64 MACControlFramesTransmitted; + u64 MACControlFramesReceived; + u64 UnsupportedOpcodesReceived; + ); }; /** @@ -372,8 +373,10 @@ struct ethtool_eth_ctrl_stats { */ struct ethtool_pause_stats { enum ethtool_mac_stats_src src; - u64 tx_pause_frames; - u64 rx_pause_frames; + struct_group(stats, + u64 tx_pause_frames; + u64 rx_pause_frames; + ); }; #define ETHTOOL_MAX_LANES 8 @@ -441,13 +444,15 @@ struct ethtool_rmon_hist_range { */ struct ethtool_rmon_stats { enum ethtool_mac_stats_src src; - u64 undersize_pkts; - u64 oversize_pkts; - u64 fragments; - u64 jabbers; - - u64 hist[ETHTOOL_RMON_HIST_MAX]; - u64 hist_tx[ETHTOOL_RMON_HIST_MAX]; + struct_group(stats, + u64 undersize_pkts; + u64 oversize_pkts; + u64 fragments; + u64 jabbers; + + u64 hist[ETHTOOL_RMON_HIST_MAX]; + u64 hist_tx[ETHTOOL_RMON_HIST_MAX]; + ); }; #define ETH_MODULE_EEPROM_PAGE_LEN 128 @@ -981,6 +986,21 @@ ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, */ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); +/* Some generic methods drivers may use in their ethtool_ops */ +u32 ethtool_op_get_link(struct net_device *dev); +int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti); + +void ethtool_aggregate_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats); +void ethtool_aggregate_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats); +void ethtool_aggregate_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats); +void ethtool_aggregate_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *pause_stats); +void ethtool_aggregate_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats); + /** * ethtool_sprintf - Write formatted string to ethtool string data * @data: Pointer to start of string to update -- cgit v1.2.3 From dd1c41645039d3820a7dde40e11f601c56240c40 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 19 Jan 2023 14:26:59 +0200 Subject: net: ethtool: add helpers for MM fragment size translation We deliberately make the Linux UAPI pass the minimum fragment size in octets, even though IEEE 802.3 defines it as discrete values, and addFragSize is just the multiplier. This is because there is nothing impossible in operating with an in-between value for the fragment size of non-final preempted fragments, and there may even appear hardware which supports the in-between sizes. For the hardware which just understands the addFragSize multiplier, create two helpers which translate back and forth the values passed in octets. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/ethtool.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 6746dee5a3fd..6a8253d3fea8 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -1001,6 +1002,47 @@ void ethtool_aggregate_pause_stats(struct net_device *dev, void ethtool_aggregate_rmon_stats(struct net_device *dev, struct ethtool_rmon_stats *rmon_stats); +/** + * ethtool_mm_frag_size_add_to_min - Translate (standard) additional fragment + * size expressed as multiplier into (absolute) minimum fragment size + * value expressed in octets + * @val_add: Value of addFragSize multiplier + */ +static inline u32 ethtool_mm_frag_size_add_to_min(u32 val_add) +{ + return (ETH_ZLEN + ETH_FCS_LEN) * (1 + val_add) - ETH_FCS_LEN; +} + +/** + * ethtool_mm_frag_size_min_to_add - Translate (absolute) minimum fragment size + * expressed in octets into (standard) additional fragment size expressed + * as multiplier + * @val_min: Value of addFragSize variable in octets + * @val_add: Pointer where the standard addFragSize value is to be returned + * @extack: Netlink extended ack + * + * Translate a value in octets to one of 0, 1, 2, 3 according to the reverse + * application of the 802.3 formula 64 * (1 + addFragSize) - 4. To be called + * by drivers which do not support programming the minimum fragment size to a + * continuous range. Returns error on other fragment length values. + */ +static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, + struct netlink_ext_ack *extack) +{ + u32 add_frag_size; + + for (add_frag_size = 0; add_frag_size < 4; add_frag_size++) { + if (ethtool_mm_frag_size_add_to_min(add_frag_size) == val_min) { + *val_add = add_frag_size; + return 0; + } + } + + NL_SET_ERR_MSG_MOD(extack, + "minFragSize required to be one of 60, 124, 188 or 252"); + return -EINVAL; +} + /** * ethtool_sprintf - Write formatted string to ethtool string data * @data: Pointer to start of string to update -- cgit v1.2.3 From 8681e3663411ee6b64dd0714b23f5c86f64f7533 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:31:02 +0100 Subject: drm/simpledrm: Support the XB24/AB24 format Add XB24 and AB24 to the list of supported formats. The format helpers support conversion to these formats and they are documented in the simple-framebuffer device tree bindings. Reviewed-by: Thomas Zimmermann Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20230120173103.4002342-8-thierry.reding@gmail.com --- include/linux/platform_data/simplefb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/simplefb.h b/include/linux/platform_data/simplefb.h index 27ea99af6e1d..4f94d52ac99f 100644 --- a/include/linux/platform_data/simplefb.h +++ b/include/linux/platform_data/simplefb.h @@ -22,6 +22,7 @@ { "r8g8b8", 24, {16, 8}, {8, 8}, {0, 8}, {0, 0}, DRM_FORMAT_RGB888 }, \ { "x8r8g8b8", 32, {16, 8}, {8, 8}, {0, 8}, {0, 0}, DRM_FORMAT_XRGB8888 }, \ { "a8r8g8b8", 32, {16, 8}, {8, 8}, {0, 8}, {24, 8}, DRM_FORMAT_ARGB8888 }, \ + { "x8b8g8r8", 32, {0, 8}, {8, 8}, {16, 8}, {0, 0}, DRM_FORMAT_XBGR8888 }, \ { "a8b8g8r8", 32, {0, 8}, {8, 8}, {16, 8}, {24, 8}, DRM_FORMAT_ABGR8888 }, \ { "x2r10g10b10", 32, {20, 10}, {10, 10}, {0, 10}, {0, 0}, DRM_FORMAT_XRGB2101010 }, \ { "a2r10g10b10", 32, {20, 10}, {10, 10}, {0, 10}, {30, 2}, DRM_FORMAT_ARGB2101010 }, \ -- cgit v1.2.3 From 92bd5a1390335bb3cc76bdf1b4356edbc94d408d Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 13 Jan 2023 09:20:37 -0600 Subject: x86/resctrl: Add interface to write mbm_total_bytes_config The event configuration for mbm_total_bytes can be changed by the user by writing to the file /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config. The event configuration settings are domain specific and affect all the CPUs in the domain. Following are the types of events supported: ==== =========================================================== Bits Description ==== =========================================================== 6 Dirty Victims from the QOS domain to all types of memory 5 Reads to slow memory in the non-local NUMA domain 4 Reads to slow memory in the local NUMA domain 3 Non-temporal writes to non-local NUMA domain 2 Non-temporal writes to local NUMA domain 1 Reads to memory in the non-local NUMA domain 0 Reads to memory in the local NUMA domain ==== =========================================================== For example: To change the mbm_total_bytes to count only reads on domain 0, the bits 0, 1, 4 and 5 needs to be set, which is 110011b (in hex 0x33). Run the command: $echo 0=0x33 > /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config To change the mbm_total_bytes to count all the slow memory reads on domain 1, the bits 4 and 5 needs to be set which is 110000b (in hex 0x30). Run the command: $echo 1=0x30 > /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/r/20230113152039.770054-12-babu.moger@amd.com --- include/linux/resctrl.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 0cee154abc9f..8334eeacfec5 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -250,6 +250,17 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, u32 rmid, enum resctrl_event_id eventid); +/** + * resctrl_arch_reset_rmid_all() - Reset all private state associated with + * all rmids and eventids. + * @r: The resctrl resource. + * @d: The domain for which all architectural counter state will + * be cleared. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d); + extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -- cgit v1.2.3 From 9d03ebc71a027ca495c60f6e94d3cda81921791f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 19 Jan 2023 14:15:21 -0800 Subject: bpf: Rename bpf_{prog,map}_is_dev_bound to is_offloaded BPF offloading infra will be reused to implement bound-but-not-offloaded bpf programs. Rename existing helpers for clarity. No functional changes. Cc: John Fastabend Cc: David Ahern Cc: Martin KaFai Lau Cc: Willem de Bruijn Cc: Jesper Dangaard Brouer Cc: Anatoly Burakov Cc: Alexander Lobakin Cc: Magnus Karlsson Cc: Maryam Tahhan Cc: xdp-hints@xdp-project.net Cc: netdev@vger.kernel.org Reviewed-by: Jakub Kicinski Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230119221536.3349901-3-sdf@google.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ae7771c7d750..1bb525c0130e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2481,12 +2481,12 @@ void unpriv_ebpf_notify(int new_state); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); -static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) +static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux) { return aux->offload_requested; } -static inline bool bpf_map_is_dev_bound(struct bpf_map *map) +static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return unlikely(map->ops == &bpf_map_offload_ops); } @@ -2513,12 +2513,12 @@ static inline int bpf_prog_offload_init(struct bpf_prog *prog, return -EOPNOTSUPP; } -static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) +static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux) { return false; } -static inline bool bpf_map_is_dev_bound(struct bpf_map *map) +static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return false; } -- cgit v1.2.3 From 2b3486bc2d237ec345b3942b7be5deabf8c8fed1 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 19 Jan 2023 14:15:24 -0800 Subject: bpf: Introduce device-bound XDP programs New flag BPF_F_XDP_DEV_BOUND_ONLY plus all the infra to have a way to associate a netdev with a BPF program at load time. netdevsim checks are dropped in favor of generic check in dev_xdp_attach. Cc: John Fastabend Cc: David Ahern Cc: Martin KaFai Lau Cc: Jakub Kicinski Cc: Willem de Bruijn Cc: Jesper Dangaard Brouer Cc: Anatoly Burakov Cc: Alexander Lobakin Cc: Magnus Karlsson Cc: Maryam Tahhan Cc: xdp-hints@xdp-project.net Cc: netdev@vger.kernel.org Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230119221536.3349901-6-sdf@google.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1bb525c0130e..b97a05bb47be 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1261,7 +1261,8 @@ struct bpf_prog_aux { enum bpf_prog_type saved_dst_prog_type; enum bpf_attach_type saved_dst_attach_type; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ - bool offload_requested; + bool dev_bound; /* Program is bound to the netdev. */ + bool offload_requested; /* Program is bound and offloaded to the netdev. */ bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool func_proto_unreliable; bool sleepable; @@ -2451,7 +2452,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); int bpf_prog_offload_compile(struct bpf_prog *prog); -void bpf_prog_offload_destroy(struct bpf_prog *prog); +void bpf_prog_dev_bound_destroy(struct bpf_prog *prog); int bpf_prog_offload_info_fill(struct bpf_prog_info *info, struct bpf_prog *prog); @@ -2479,7 +2480,13 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev); void unpriv_ebpf_notify(int new_state); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) -int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); +int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr); +void bpf_dev_bound_netdev_unregister(struct net_device *dev); + +static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) +{ + return aux->dev_bound; +} static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux) { @@ -2507,12 +2514,21 @@ void sock_map_unhash(struct sock *sk); void sock_map_destroy(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else -static inline int bpf_prog_offload_init(struct bpf_prog *prog, +static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr) { return -EOPNOTSUPP; } +static inline void bpf_dev_bound_netdev_unregister(struct net_device *dev) +{ +} + +static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) +{ + return false; +} + static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux) { return false; -- cgit v1.2.3 From 3d76a4d3d4e591af3e789698affaad88a5a8e8ab Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 19 Jan 2023 14:15:26 -0800 Subject: bpf: XDP metadata RX kfuncs Define a new kfunc set (xdp_metadata_kfunc_ids) which implements all possible XDP metatada kfuncs. Not all devices have to implement them. If kfunc is not supported by the target device, the default implementation is called instead. The verifier, at load time, replaces a call to the generic kfunc with a call to the per-device one. Per-device kfunc pointers are stored in separate struct xdp_metadata_ops. Cc: John Fastabend Cc: David Ahern Cc: Martin KaFai Lau Cc: Jakub Kicinski Cc: Willem de Bruijn Cc: Jesper Dangaard Brouer Cc: Anatoly Burakov Cc: Alexander Lobakin Cc: Magnus Karlsson Cc: Maryam Tahhan Cc: xdp-hints@xdp-project.net Cc: netdev@vger.kernel.org Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230119221536.3349901-8-sdf@google.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 17 ++++++++++++++++- include/linux/netdevice.h | 8 ++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b97a05bb47be..bb26c2e18092 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2480,6 +2480,9 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev); void unpriv_ebpf_notify(int new_state); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) +int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log, + struct bpf_prog_aux *prog_aux); +void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id); int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr); void bpf_dev_bound_netdev_unregister(struct net_device *dev); @@ -2514,8 +2517,20 @@ void sock_map_unhash(struct sock *sk); void sock_map_destroy(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else +static inline int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log, + struct bpf_prog_aux *prog_aux) +{ + return -EOPNOTSUPP; +} + +static inline void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, + u32 func_id) +{ + return NULL; +} + static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog, - union bpf_attr *attr) + union bpf_attr *attr) { return -EOPNOTSUPP; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aad12a179e54..90f2be194bc5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -74,6 +74,7 @@ struct udp_tunnel_nic_info; struct udp_tunnel_nic; struct bpf_prog; struct xdp_buff; +struct xdp_md; void synchronize_net(void); void netdev_set_default_ethtool_ops(struct net_device *dev, @@ -1618,6 +1619,11 @@ struct net_device_ops { bool cycles); }; +struct xdp_metadata_ops { + int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); + int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash); +}; + /** * enum netdev_priv_flags - &struct net_device priv_flags * @@ -1801,6 +1807,7 @@ enum netdev_ml_priv_type { * * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions + * @xdp_metadata_ops: Includes pointers to XDP metadata callbacks. * @ethtool_ops: Management operations * @l3mdev_ops: Layer 3 master device operations * @ndisc_ops: Includes callbacks for different IPv6 neighbour @@ -2050,6 +2057,7 @@ struct net_device { unsigned int flags; unsigned long long priv_flags; const struct net_device_ops *netdev_ops; + const struct xdp_metadata_ops *xdp_metadata_ops; int ifindex; unsigned short gflags; unsigned short hard_header_len; -- cgit v1.2.3 From fd7c211d6875013f81acc09868effe199b5d2c0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 19 Jan 2023 14:15:27 -0800 Subject: bpf: Support consuming XDP HW metadata from fext programs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of rejecting the attaching of PROG_TYPE_EXT programs to XDP programs that consume HW metadata, implement support for propagating the offload information. The extension program doesn't need to set a flag or ifindex, these will just be propagated from the target by the verifier. We need to create a separate offload object for the extension program, though, since it can be reattached to a different program later (which means we can't just inherit the offload information from the target). An additional check is added on attach that the new target is compatible with the offload information in the extension prog. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230119221536.3349901-9-sdf@google.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bb26c2e18092..ad4bb36d4c10 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2484,6 +2484,7 @@ int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log, struct bpf_prog_aux *prog_aux); void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id); int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr); +int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, struct bpf_prog *old_prog); void bpf_dev_bound_netdev_unregister(struct net_device *dev); static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) @@ -2496,6 +2497,8 @@ static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux) return aux->offload_requested; } +bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs); + static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return unlikely(map->ops == &bpf_map_offload_ops); @@ -2535,6 +2538,12 @@ static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog, return -EOPNOTSUPP; } +static inline int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, + struct bpf_prog *old_prog) +{ + return -EOPNOTSUPP; +} + static inline void bpf_dev_bound_netdev_unregister(struct net_device *dev) { } @@ -2549,6 +2558,11 @@ static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux) return false; } +static inline bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs) +{ + return false; +} + static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return false; -- cgit v1.2.3 From 8786b095df02c1b881643146869a7d6f80411e6a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 18 Jan 2023 22:55:12 +0100 Subject: i2c: gpio: support write-only sda/scl w/o pull-up There are slave devices that understand I2C but have read-only SDA and SCL. Examples are FD650 7-segment LED controller and its derivatives. Typical board designs don't even have a pull-up for both pins. Handle the new attributes for write-only SDA and missing pull-up on SDA/SCL. For either pin the open-drain and has-no-pullup properties are mutually-exclusive, what is documented in the DT property documentation. We don't add an extra warning here because the open-drain properties are marked deprecated anyway. Signed-off-by: Heiner Kallweit [wsa: switched to device properties] Signed-off-by: Wolfram Sang --- include/linux/platform_data/i2c-gpio.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/i2c-gpio.h b/include/linux/platform_data/i2c-gpio.h index a907774fd177..545639bcca72 100644 --- a/include/linux/platform_data/i2c-gpio.h +++ b/include/linux/platform_data/i2c-gpio.h @@ -16,16 +16,25 @@ * isn't actively driven high when setting the output value high. * gpio_get_value() must return the actual pin state even if the * pin is configured as an output. + * @sda_is_output_only: SDA output drivers can't be turned off. + * This is for clients that can only read SDA/SCL. + * @sda_has_no_pullup: SDA is used in a non-compliant way and has no pull-up. + * Therefore disable open-drain. * @scl_is_open_drain: SCL is set up as open drain. Same requirements * as for sda_is_open_drain apply. * @scl_is_output_only: SCL output drivers cannot be turned off. + * @scl_has_no_pullup: SCL is used in a non-compliant way and has no pull-up. + * Therefore disable open-drain. */ struct i2c_gpio_platform_data { int udelay; int timeout; unsigned int sda_is_open_drain:1; + unsigned int sda_is_output_only:1; + unsigned int sda_has_no_pullup:1; unsigned int scl_is_open_drain:1; unsigned int scl_is_output_only:1; + unsigned int scl_has_no_pullup:1; }; #endif /* _LINUX_I2C_GPIO_H */ -- cgit v1.2.3 From 998101f2a78cf506022f2094461105cb3d00e19f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sat, 21 Jan 2023 20:24:16 +0100 Subject: fbdev: Remove unused struct fb_deferred_io .first_io field This optional callback was added in the commit 1f45f9dbb392 ("fb_defio: add first_io callback") but it was never used by a driver. Let's remove it since it's unlikely that will be used after a decade that was added. Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230121192418.2814955-2-javierm@redhat.com --- include/linux/fb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 30183fd259ae..daf336385613 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -215,7 +215,6 @@ struct fb_deferred_io { struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ /* callback */ - void (*first_io)(struct fb_info *info); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); }; #endif -- cgit v1.2.3 From cbad0fb2d8d97fa6dd8089c0cc729ced0abacad6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Jan 2023 13:45:56 +0000 Subject: ftrace: Add DYNAMIC_FTRACE_WITH_CALL_OPS Architectures without dynamic ftrace trampolines incur an overhead when multiple ftrace_ops are enabled with distinct filters. in these cases, each call site calls a common trampoline which uses ftrace_ops_list_func() to iterate over all enabled ftrace functions, and so incurs an overhead relative to the size of this list (including RCU protection overhead). Architectures with dynamic ftrace trampolines avoid this overhead for call sites which have a single associated ftrace_ops. In these cases, the dynamic trampoline is customized to branch directly to the relevant ftrace function, avoiding the list overhead. On some architectures it's impractical and/or undesirable to implement dynamic ftrace trampolines. For example, arm64 has limited branch ranges and cannot always directly branch from a call site to an arbitrary address (e.g. from a kernel text address to an arbitrary module address). Calls from modules to core kernel text can be indirected via PLTs (allocated at module load time) to address this, but the same is not possible from calls from core kernel text. Using an indirect branch from a call site to an arbitrary trampoline is possible, but requires several more instructions in the function prologue (or immediately before it), and/or comes with far more complex requirements for patching. Instead, this patch adds a new option, where an architecture can associate each call site with a pointer to an ftrace_ops, placed at a fixed offset from the call site. A shared trampoline can recover this pointer and call ftrace_ops::func() without needing to go via ftrace_ops_list_func(), avoiding the associated overhead. This avoids issues with branch range limitations, and avoids the need to allocate and manipulate dynamic trampolines, making it far simpler to implement and maintain, while having similar performance characteristics. Note that this allows for dynamic ftrace_ops to be invoked directly from an architecture's ftrace_caller trampoline, whereas existing code forces the use of ftrace_ops_get_list_func(), which is in part necessary to permit the ftrace_ops to be freed once unregistered *and* to avoid branch/address-generation range limitation on some architectures (e.g. where ops->func is a module address, and may be outside of the direct branch range for callsites within the main kernel image). The CALL_OPS approach avoids this problems and is safe as: * The existing synchronization in ftrace_shutdown() using ftrace_shutdown() using synchronize_rcu_tasks_rude() (and synchronize_rcu_tasks()) ensures that no tasks hold a stale reference to an ftrace_ops (e.g. in the middle of the ftrace_caller trampoline, or while invoking ftrace_ops::func), when that ftrace_ops is unregistered. Arguably this could also be relied upon for the existing scheme, permitting dynamic ftrace_ops to be invoked directly when ops->func is in range, but this will require additional logic to handle branch range limitations, and is not handled by this patch. * Each callsite's ftrace_ops pointer literal can hold any valid kernel address, and is updated atomically. As an architecture's ftrace_caller trampoline will atomically load the ops pointer then dereference ops->func, there is no risk of invoking ops->func with a mismatches ops pointer, and updates to the ops pointer do not require special care. A subsequent patch will implement architectures support for arm64. There should be no functional change as a result of this patch alone. Signed-off-by: Mark Rutland Reviewed-by: Steven Rostedt (Google) Cc: Florent Revest Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Will Deacon Link: https://lore.kernel.org/r/20230123134603.1064407-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- include/linux/ftrace.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 99f1146614c0..366c730beaa3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -39,6 +39,7 @@ static inline void ftrace_boot_snapshot(void) { } struct ftrace_ops; struct ftrace_regs; +struct dyn_ftrace; #ifdef CONFIG_FUNCTION_TRACER /* @@ -57,6 +58,9 @@ void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); #endif +extern const struct ftrace_ops ftrace_nop_ops; +extern const struct ftrace_ops ftrace_list_ops; +struct ftrace_ops *ftrace_find_unique_ops(struct dyn_ftrace *rec); #endif /* CONFIG_FUNCTION_TRACER */ /* Main tracing buffer and events set up */ @@ -391,8 +395,6 @@ struct ftrace_func_entry { unsigned long direct; /* for direct lookup only */ }; -struct dyn_ftrace; - #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS extern int ftrace_direct_func_count; int register_ftrace_direct(unsigned long ip, unsigned long addr); @@ -563,6 +565,8 @@ bool is_ftrace_trampoline(unsigned long addr); * IPMODIFY - the record allows for the IP address to be changed. * DISABLED - the record is not ready to be touched yet * DIRECT - there is a direct function to call + * CALL_OPS - the record can use callsite-specific ops + * CALL_OPS_EN - the function is set up to use callsite-specific ops * * When a new ftrace_ops is registered and wants a function to save * pt_regs, the rec->flags REGS is set. When the function has been @@ -580,9 +584,11 @@ enum { FTRACE_FL_DISABLED = (1UL << 25), FTRACE_FL_DIRECT = (1UL << 24), FTRACE_FL_DIRECT_EN = (1UL << 23), + FTRACE_FL_CALL_OPS = (1UL << 22), + FTRACE_FL_CALL_OPS_EN = (1UL << 21), }; -#define FTRACE_REF_MAX_SHIFT 23 +#define FTRACE_REF_MAX_SHIFT 21 #define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1) #define ftrace_rec_count(rec) ((rec)->flags & FTRACE_REF_MAX) @@ -820,7 +826,8 @@ static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#if defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS) || \ + defined(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS) /** * ftrace_modify_call - convert from one addr to another (no nop) * @rec: the call site record (e.g. mcount/fentry) @@ -833,6 +840,9 @@ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); * what we expect it to be, and then on success of the compare, * it should write to the location. * + * When using call ops, this is called when the associated ops change, even + * when (addr == old_addr). + * * The code segment at @rec->ip should be a caller to @old_addr * * Return must be: -- cgit v1.2.3 From c27cd083cfb9d392f304657ed00fcde1136704e7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Jan 2023 13:45:57 +0000 Subject: Compiler attributes: GCC cold function alignment workarounds Contemporary versions of GCC (e.g. GCC 12.2.0) drop the alignment specified by '-falign-functions=N' for functions marked with the __cold__ attribute, and potentially for callees of __cold__ functions as these may be implicitly marked as __cold__ by the compiler. LLVM appears to respect '-falign-functions=N' in such cases. This has been reported to GCC in bug 88345: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345 ... which also covers alignment being dropped when '-Os' is used, which will be addressed in a separate patch. Currently, use of '-falign-functions=N' is limited to CONFIG_FUNCTION_ALIGNMENT, which is largely used for performance and/or analysis reasons (e.g. with CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B), but isn't necessary for correct functionality. However, this dropped alignment isn't great for the performance and/or analysis cases. Subsequent patches will use CONFIG_FUNCTION_ALIGNMENT as part of arm64's ftrace implementation, which will require all instrumented functions to be aligned to at least 8-bytes. This patch works around the dropped alignment by avoiding the use of the __cold__ attribute when CONFIG_FUNCTION_ALIGNMENT is non-zero, and by specifically aligning abort(), which GCC implicitly marks as __cold__. As the __cold macro is now dependent upon config options (which is against the policy described at the top of compiler_attributes.h), it is moved into compiler_types.h. I've tested this by building and booting a kernel configured with defconfig + CONFIG_EXPERT=y + CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B=y, and looking for misaligned text symbols in /proc/kallsyms: * arm64: Before: # uname -rm 6.2.0-rc3 aarch64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 5009 After: # uname -rm 6.2.0-rc3-00001-g2a2bedf8bfa9 aarch64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 919 * x86_64: Before: # uname -rm 6.2.0-rc3 x86_64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 11537 After: # uname -rm 6.2.0-rc3-00001-g2a2bedf8bfa9 x86_64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 2805 There's clearly a substantial reduction in the number of misaligned symbols. From manual inspection, the remaining unaligned text labels are a combination of ACPICA functions (due to the use of '-Os'), static call trampolines, and non-function labels in assembly, which will be dealt with in subsequent patches. Signed-off-by: Mark Rutland Cc: Florent Revest Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Will Deacon Cc: Miguel Ojeda Cc: Nick Desaulniers Link: https://lore.kernel.org/r/20230123134603.1064407-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- include/linux/compiler_attributes.h | 6 ------ include/linux/compiler_types.h | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 898b3458b24a..b83126452c65 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -75,12 +75,6 @@ # define __assume_aligned(a, ...) #endif -/* - * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute - * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute - */ -#define __cold __attribute__((__cold__)) - /* * Note the long name. * diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 7c1afe0f4129..aab34e30128e 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -79,6 +79,33 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* Attributes */ #include +#if CONFIG_FUNCTION_ALIGNMENT > 0 +#define __function_aligned __aligned(CONFIG_FUNCTION_ALIGNMENT) +#else +#define __function_aligned +#endif + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute + * + * When -falign-functions=N is in use, we must avoid the cold attribute as + * contemporary versions of GCC drop the alignment for cold functions. Worse, + * GCC can implicitly mark callees of cold functions as cold themselves, so + * it's not sufficient to add __function_aligned here as that will not ensure + * that callees are correctly aligned. + * + * See: + * + * https://lore.kernel.org/lkml/Y77%2FqVgvaJidFpYt@FVFF77S0Q05N + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c9 + */ +#if !defined(CONFIG_CC_IS_GCC) || (CONFIG_FUNCTION_ALIGNMENT == 0) +#define __cold __attribute__((__cold__)) +#else +#define __cold +#endif + /* Builtins */ /* -- cgit v1.2.3 From 39f5a81f7ad80eb3fbcbfd817c6552db9de5504d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 24 Jan 2023 11:57:52 +0100 Subject: platform/x86: apple-gmux: Move port defines to apple-gmux.h This is a preparation patch for adding a new static inline apple_gmux_detect() helper which actually checks a supported gmux is present, rather then only checking an ACPI device with the HID is there as apple_gmux_present() does. Fixes: 21245df307cb ("ACPI: video: Add Apple GMUX brightness control detection") Link: https://lore.kernel.org/platform-driver-x86/20230123113750.462144-1-hdegoede@redhat.com/ Reported-by: Emmanouil Kouroupakis Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230124105754.62167-2-hdegoede@redhat.com --- include/linux/apple-gmux.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/apple-gmux.h b/include/linux/apple-gmux.h index ddb10aa67b14..80efaaf89e07 100644 --- a/include/linux/apple-gmux.h +++ b/include/linux/apple-gmux.h @@ -11,6 +11,29 @@ #define GMUX_ACPI_HID "APP000B" +/* + * gmux port offsets. Many of these are not yet used, but may be in the + * future, and it's useful to have them documented here anyhow. + */ +#define GMUX_PORT_VERSION_MAJOR 0x04 +#define GMUX_PORT_VERSION_MINOR 0x05 +#define GMUX_PORT_VERSION_RELEASE 0x06 +#define GMUX_PORT_SWITCH_DISPLAY 0x10 +#define GMUX_PORT_SWITCH_GET_DISPLAY 0x11 +#define GMUX_PORT_INTERRUPT_ENABLE 0x14 +#define GMUX_PORT_INTERRUPT_STATUS 0x16 +#define GMUX_PORT_SWITCH_DDC 0x28 +#define GMUX_PORT_SWITCH_EXTERNAL 0x40 +#define GMUX_PORT_SWITCH_GET_EXTERNAL 0x41 +#define GMUX_PORT_DISCRETE_POWER 0x50 +#define GMUX_PORT_MAX_BRIGHTNESS 0x70 +#define GMUX_PORT_BRIGHTNESS 0x74 +#define GMUX_PORT_VALUE 0xc2 +#define GMUX_PORT_READ 0xd0 +#define GMUX_PORT_WRITE 0xd4 + +#define GMUX_MIN_IO_LEN (GMUX_PORT_BRIGHTNESS + 4) + #if IS_ENABLED(CONFIG_APPLE_GMUX) /** -- cgit v1.2.3 From d143908f80f3e5d164ac3342f73d6b9f536e8b4d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 24 Jan 2023 11:57:53 +0100 Subject: platform/x86: apple-gmux: Add apple_gmux_detect() helper Add a new (static inline) apple_gmux_detect() helper to apple-gmux.h which can be used for gmux detection instead of apple_gmux_present(). The latter is not really reliable since an ACPI device with a HID of APP000B is present on some devices without a gmux at all, as well as on devices with a newer (unsupported) MMIO based gmux model. This causes apple_gmux_present() to return false-positives on a number of different Apple laptop models. This new helper uses the same probing as the actual apple-gmux driver, so that it does not return false positives. To avoid code duplication the gmux_probe() function of the actual driver is also moved over to using the new apple_gmux_detect() helper. This avoids false positives (vs _HID + IO region detection) on: MacBookPro5,4 https://pastebin.com/8Xjq7RhS MacBookPro8,1 https://linux-hardware.org/?probe=e513cfbadb&log=dmesg MacBookPro9,2 https://bugzilla.kernel.org/attachment.cgi?id=278961 MacBookPro10,2 https://lkml.org/lkml/2014/9/22/657 MacBookPro11,2 https://forums.fedora-fr.org/viewtopic.php?id=70142 MacBookPro11,4 https://raw.githubusercontent.com/im-0/investigate-card-reader-suspend-problem-on-mbp11.4/master/test-16/dmesg Fixes: 21245df307cb ("ACPI: video: Add Apple GMUX brightness control detection") Link: https://lore.kernel.org/platform-driver-x86/20230123113750.462144-1-hdegoede@redhat.com/ Reported-by: Emmanouil Kouroupakis Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230124105754.62167-3-hdegoede@redhat.com --- include/linux/apple-gmux.h | 86 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/apple-gmux.h b/include/linux/apple-gmux.h index 80efaaf89e07..1f68b49bcd68 100644 --- a/include/linux/apple-gmux.h +++ b/include/linux/apple-gmux.h @@ -8,6 +8,8 @@ #define LINUX_APPLE_GMUX_H #include +#include +#include #define GMUX_ACPI_HID "APP000B" @@ -35,14 +37,89 @@ #define GMUX_MIN_IO_LEN (GMUX_PORT_BRIGHTNESS + 4) #if IS_ENABLED(CONFIG_APPLE_GMUX) +static inline bool apple_gmux_is_indexed(unsigned long iostart) +{ + u16 val; + + outb(0xaa, iostart + 0xcc); + outb(0x55, iostart + 0xcd); + outb(0x00, iostart + 0xce); + + val = inb(iostart + 0xcc) | (inb(iostart + 0xcd) << 8); + if (val == 0x55aa) + return true; + + return false; +} /** - * apple_gmux_present() - detect if gmux is built into the machine + * apple_gmux_detect() - detect if gmux is built into the machine + * + * @pnp_dev: Device to probe or NULL to use the first matching device + * @indexed_ret: Returns (by reference) if the gmux is indexed or not + * + * Detect if a supported gmux device is present by actually probing it. + * This avoids the false positives returned on some models by + * apple_gmux_present(). + * + * Return: %true if a supported gmux ACPI device is detected and the kernel + * was configured with CONFIG_APPLE_GMUX, %false otherwise. + */ +static inline bool apple_gmux_detect(struct pnp_dev *pnp_dev, bool *indexed_ret) +{ + u8 ver_major, ver_minor, ver_release; + struct device *dev = NULL; + struct acpi_device *adev; + struct resource *res; + bool indexed = false; + bool ret = false; + + if (!pnp_dev) { + adev = acpi_dev_get_first_match_dev(GMUX_ACPI_HID, NULL, -1); + if (!adev) + return false; + + dev = get_device(acpi_get_first_physical_node(adev)); + acpi_dev_put(adev); + if (!dev) + return false; + + pnp_dev = to_pnp_dev(dev); + } + + res = pnp_get_resource(pnp_dev, IORESOURCE_IO, 0); + if (!res || resource_size(res) < GMUX_MIN_IO_LEN) + goto out; + + /* + * Invalid version information may indicate either that the gmux + * device isn't present or that it's a new one that uses indexed io. + */ + ver_major = inb(res->start + GMUX_PORT_VERSION_MAJOR); + ver_minor = inb(res->start + GMUX_PORT_VERSION_MINOR); + ver_release = inb(res->start + GMUX_PORT_VERSION_RELEASE); + if (ver_major == 0xff && ver_minor == 0xff && ver_release == 0xff) { + indexed = apple_gmux_is_indexed(res->start); + if (!indexed) + goto out; + } + + if (indexed_ret) + *indexed_ret = indexed; + + ret = true; +out: + put_device(dev); + return ret; +} + +/** + * apple_gmux_present() - check if gmux ACPI device is present * * Drivers may use this to activate quirks specific to dual GPU MacBook Pros * and Mac Pros, e.g. for deferred probing, runtime pm and backlight. * - * Return: %true if gmux is present and the kernel was configured + * Return: %true if gmux ACPI device is present and the kernel was configured * with CONFIG_APPLE_GMUX, %false otherwise. */ static inline bool apple_gmux_present(void) @@ -57,6 +134,11 @@ static inline bool apple_gmux_present(void) return false; } +static inline bool apple_gmux_detect(struct pnp_dev *pnp_dev, bool *indexed_ret) +{ + return false; +} + #endif /* !CONFIG_APPLE_GMUX */ #endif /* LINUX_APPLE_GMUX_H */ -- cgit v1.2.3 From b1cd16330c8c6bc73e769b7b63e46cec2e292d6c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 17 Nov 2022 23:34:19 +0300 Subject: KVM: account allocation in generic version of kvm_arch_alloc_vm() Account the allocation of VMs in the generic version of kvm_arch_alloc_vm(), the VM is tied to the current task/process. Note, x86 already accounts its allocation. Signed-off-by: Alexey Dobriyan Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/Y3aay2u2KQgiR0un@p183 [sean: reworded changelog] Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 109b18e2789c..c2bd15204c3f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1461,7 +1461,7 @@ int kvm_arch_create_vm_debugfs(struct kvm *kvm); */ static inline struct kvm *kvm_arch_alloc_vm(void) { - return kzalloc(sizeof(struct kvm), GFP_KERNEL); + return kzalloc(sizeof(struct kvm), GFP_KERNEL_ACCOUNT); } #endif -- cgit v1.2.3 From b9926482ab91cd6dc19e28ae4f0ae98c6c1217d7 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Tue, 20 Sep 2022 14:02:10 +0800 Subject: kvm_host.h: fix spelling typo in function declaration Make parameters in function declaration consistent with those in function definition for better cscope-ability Signed-off-by: Wang Liang Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20220920060210.4842-1-wangliangzz@126.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c2bd15204c3f..a7d6a6111f5e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1353,7 +1353,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); -void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); +void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); -- cgit v1.2.3 From 7a0e39748861272e6f4b088d5a7e7ffa53c4d5eb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 23 Jan 2023 19:38:31 +0100 Subject: thermal: ACPI: Add ACPI trip point routines Add library routines to populate a generic thermal trip point structure with data obtained by evaluating a specific object in the ACPI Namespace. Signed-off-by: Rafael J. Wysocki Co-developed-by: Daniel Lezcano Signed-off-by: Daniel Lezcano Tested-by: Srinivas Pandruvada --- include/linux/thermal.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 30353e4b1424..23c2617156b5 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -346,6 +346,14 @@ int thermal_zone_get_num_trips(struct thermal_zone_device *tz); int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp); +#ifdef CONFIG_THERMAL_ACPI +int thermal_acpi_trip_active(struct acpi_device *adev, int id, + struct thermal_trip *trip); +int thermal_acpi_trip_passive(struct acpi_device *adev, struct thermal_trip *trip); +int thermal_acpi_trip_hot(struct acpi_device *adev, struct thermal_trip *trip); +int thermal_acpi_trip_critical(struct acpi_device *adev, struct thermal_trip *trip); +#endif + #ifdef CONFIG_THERMAL struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, void *, struct thermal_zone_device_ops *, -- cgit v1.2.3 From ec8f7d495b3d37c5f0d66cdc7e43ef76939293b6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Jan 2023 14:22:24 -0800 Subject: netlink: fix spelling mistake in dump size assert Commit 2c7bc10d0f7b ("netlink: add macro for checking dump ctx size") misspelled the name of the assert as asset, missing an R. Reported-by: Ido Schimmel Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20230123222224.732338-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 38f6334f408c..fa4d86da0ec7 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -263,7 +263,7 @@ struct netlink_callback { }; }; -#define NL_ASSET_DUMP_CTX_FITS(type_name) \ +#define NL_ASSERT_DUMP_CTX_FITS(type_name) \ BUILD_BUG_ON(sizeof(type_name) > \ sizeof_field(struct netlink_callback, ctx)) -- cgit v1.2.3 From 57539b1c0ac2dcccbe64a7675ff466be009c040f Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 20 Jan 2023 13:25:15 -0600 Subject: bpf: Enable annotating trusted nested pointers In kfuncs, a "trusted" pointer is a pointer that the kfunc can assume is safe, and which the verifier will allow to be passed to a KF_TRUSTED_ARGS kfunc. Currently, a KF_TRUSTED_ARGS kfunc disallows any pointer to be passed at a nonzero offset, but sometimes this is in fact safe if the "nested" pointer's lifetime is inherited from its parent. For example, the const cpumask_t *cpus_ptr field in a struct task_struct will remain valid until the task itself is destroyed, and thus would also be safe to pass to a KF_TRUSTED_ARGS kfunc. While it would be conceptually simple to enable this by using BTF tags, gcc unfortunately does not yet support this. In the interim, this patch enables support for this by using a type-naming convention. A new BTF_TYPE_SAFE_NESTED macro is defined in verifier.c which allows a developer to specify the nested fields of a type which are considered trusted if its parent is also trusted. The verifier is also updated to account for this. A patch with selftests will be added in a follow-on change, along with documentation for this feature. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230120192523.3650503-2-void@manifault.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ad4bb36d4c10..982213d97668 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2187,6 +2187,10 @@ struct bpf_core_ctx { const struct btf *btf; }; +bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off); + int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, int relo_idx, void *insn); -- cgit v1.2.3 From b613d335a743cf0e0ef0ccba9ad129904e2a26fb Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 20 Jan 2023 13:25:16 -0600 Subject: bpf: Allow trusted args to walk struct when checking BTF IDs When validating BTF types for KF_TRUSTED_ARGS kfuncs, the verifier currently enforces that the top-level type must match when calling the kfunc. In other words, the verifier does not allow the BPF program to pass a bitwise equivalent struct, despite it being allowed according to the C standard. For example, if you have the following type: struct nf_conn___init { struct nf_conn ct; }; The C standard stipulates that it would be safe to pass a struct nf_conn___init to a kfunc expecting a struct nf_conn. The verifier currently disallows this, however, as semantically kfuncs may want to enforce that structs that have equivalent types according to the C standard, but have different BTF IDs, are not able to be passed to kfuncs expecting one or the other. For example, struct nf_conn___init may not be queried / looked up, as it is allocated but may not yet be fully initialized. On the other hand, being able to pass types that are equivalent according to the C standard will be useful for other types of kfunc / kptrs enabled by BPF. For example, in a follow-on patch, a series of kfuncs will be added which allow programs to do bitwise queries on cpumasks that are either allocated by the program (in which case they'll be a 'struct bpf_cpumask' type that wraps a cpumask_t as its first element), or a cpumask that was allocated by the main kernel (in which case it will just be a straight cpumask_t, as in task->cpus_ptr). Having the two types of cpumasks allows us to distinguish between the two for when a cpumask is read-only vs. mutatable. A struct bpf_cpumask can be mutated by e.g. bpf_cpumask_clear(), whereas a regular cpumask_t cannot be. On the other hand, a struct bpf_cpumask can of course be queried in the exact same manner as a cpumask_t, with e.g. bpf_cpumask_test_cpu(). If we were to enforce that top level types match, then a user that's passing a struct bpf_cpumask to a read-only cpumask_t argument would have to cast with something like bpf_cast_to_kern_ctx() (which itself would need to be updated to expect the alias, and currently it only accommodates a single alias per prog type). Additionally, not specifying KF_TRUSTED_ARGS is not an option, as some kfuncs take one argument as a struct bpf_cpumask *, and another as a struct cpumask * (i.e. cpumask_t). In order to enable this, this patch relaxes the constraint that a KF_TRUSTED_ARGS kfunc must have strict type matching, and instead only enforces strict type matching if a type is observed to be a "no-cast alias" (i.e., that the type names are equivalent, but one is suffixed with ___init). Additionally, in order to try and be conservative and match existing behavior / expectations, this patch also enforces strict type checking for acquire kfuncs. We were already enforcing it for release kfuncs, so this should also improve the consistency of the semantics for kfuncs. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230120192523.3650503-3-void@manifault.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 982213d97668..1bec48d9e5d9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2191,6 +2191,10 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off); +bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log, + const struct btf *reg_btf, u32 reg_id, + const struct btf *arg_btf, u32 arg_id); + int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, int relo_idx, void *insn); -- cgit v1.2.3 From ddce1e091757d0259107c6c0c7262df201de2b66 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sat, 21 Jan 2023 13:41:44 +0100 Subject: bpf, sockmap: Check for any of tcp_bpf_prots when cloning a listener A listening socket linked to a sockmap has its sk_prot overridden. It points to one of the struct proto variants in tcp_bpf_prots. The variant depends on the socket's family and which sockmap programs are attached. A child socket cloned from a TCP listener initially inherits their sk_prot. But before cloning is finished, we restore the child's proto to the listener's original non-tcp_bpf_prots one. This happens in tcp_create_openreq_child -> tcp_bpf_clone. Today, in tcp_bpf_clone we detect if the child's proto should be restored by checking only for the TCP_BPF_BASE proto variant. This is not correct. The sk_prot of listening socket linked to a sockmap can point to to any variant in tcp_bpf_prots. If the listeners sk_prot happens to be not the TCP_BPF_BASE variant, then the child socket unintentionally is left if the inherited sk_prot by tcp_bpf_clone. This leads to issues like infinite recursion on close [1], because the child state is otherwise not set up for use with tcp_bpf_prot operations. Adjust the check in tcp_bpf_clone to detect all of tcp_bpf_prots variants. Note that it wouldn't be sufficient to check the socket state when overriding the sk_prot in tcp_bpf_update_proto in order to always use the TCP_BPF_BASE variant for listening sockets. Since commit b8b8315e39ff ("bpf, sockmap: Remove unhash handler for BPF sockmap usage") it is possible for a socket to transition to TCP_LISTEN state while already linked to a sockmap, e.g. connect() -> insert into map -> connect(AF_UNSPEC) -> listen(). [1]: https://lore.kernel.org/all/00000000000073b14905ef2e7401@google.com/ Fixes: e80251555f0b ("tcp_bpf: Don't let child socket inherit parent protocol ops on copy") Reported-by: syzbot+04c21ed96d861dccc5cd@syzkaller.appspotmail.com Signed-off-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/r/20230113-sockmap-fix-v2-2-1e0ee7ac2f90@cloudflare.com Signed-off-by: Alexei Starovoitov --- include/linux/util_macros.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 72299f261b25..43db6e47503c 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -38,4 +38,16 @@ */ #define find_closest_descending(x, a, as) __find_closest(x, a, as, >=) +/** + * is_insidevar - check if the @ptr points inside the @var memory range. + * @ptr: the pointer to a memory address. + * @var: the variable which address and size identify the memory range. + * + * Evaluates to true if the address in @ptr lies within the memory + * range allocated to @var. + */ +#define is_insidevar(ptr, var) \ + ((uintptr_t)(ptr) >= (uintptr_t)(var) && \ + (uintptr_t)(ptr) < (uintptr_t)(var) + sizeof(var)) + #endif -- cgit v1.2.3 From 1baedb13f1d50ae8c7852134fdf934b4463e9baa Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jan 2023 19:17:47 +0100 Subject: s390/ism: Introduce struct ism_dmb Conceptually, a DMB is a structure that belongs to ISM devices. However, SMC currently 'owns' this structure. So future exploiters of ISM devices would be forced to include SMC headers to work - which is just weird. Therefore, we switch ISM to struct ism_dmb, introduce a new public header with the definition (will be populated with further API calls later on), and, add a thin wrapper to please SMC. Since structs smcd_dmb and ism_dmb are identical, we can simply convert between the two for now. Signed-off-by: Stefan Raspl Signed-off-by: Jan Karcher Signed-off-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/linux/ism.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/ism.h (limited to 'include/linux') diff --git a/include/linux/ism.h b/include/linux/ism.h new file mode 100644 index 000000000000..69bfbf0faaa1 --- /dev/null +++ b/include/linux/ism.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Internal Shared Memory + * + * Definitions for the ISM module + * + * Copyright IBM Corp. 2022 + */ +#ifndef _ISM_H +#define _ISM_H + +struct ism_dmb { + u64 dmb_tok; + u64 rgid; + u32 dmb_len; + u32 sba_idx; + u32 vlan_valid; + u32 vlan_id; + void *cpu_addr; + dma_addr_t dma_addr; +}; + +#endif /* _ISM_H */ -- cgit v1.2.3 From 89e7d2ba61b742a7525ff06ea4d4378c4a5560d0 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jan 2023 19:17:48 +0100 Subject: net/ism: Add new API for client registration Add a new API that allows other drivers to concurrently access ISM devices. To do so, we introduce a new API that allows other modules to register for ISM device usage. Furthermore, we move the GID to struct ism, where it belongs conceptually, and rename and relocate struct smcd_event to struct ism_event. This is the first part of a bigger overhaul of the interfaces between SMC and ISM. Signed-off-by: Stefan Raspl Signed-off-by: Jan Karcher Signed-off-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/linux/ism.h | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ism.h b/include/linux/ism.h index 69bfbf0faaa1..55c8ad306928 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -9,6 +9,8 @@ #ifndef _ISM_H #define _ISM_H +#include + struct ism_dmb { u64 dmb_tok; u64 rgid; @@ -20,4 +22,69 @@ struct ism_dmb { dma_addr_t dma_addr; }; +/* Unless we gain unexpected popularity, this limit should hold for a while */ +#define MAX_CLIENTS 8 +#define ISM_NR_DMBS 1920 + +struct ism_dev { + spinlock_t lock; /* protects the ism device */ + struct list_head list; + struct pci_dev *pdev; + struct smcd_dev *smcd; + + struct ism_sba *sba; + dma_addr_t sba_dma_addr; + DECLARE_BITMAP(sba_bitmap, ISM_NR_DMBS); + u8 *sba_client_arr; /* entries are indices into 'clients' array */ + void *priv[MAX_CLIENTS]; + + struct ism_eq *ieq; + dma_addr_t ieq_dma_addr; + + struct device dev; + u64 local_gid; + int ieq_idx; + + atomic_t free_clients_cnt; + atomic_t add_dev_cnt; + wait_queue_head_t waitq; +}; + +struct ism_event { + u32 type; + u32 code; + u64 tok; + u64 time; + u64 info; +}; + +struct ism_client { + const char *name; + void (*add)(struct ism_dev *dev); + void (*remove)(struct ism_dev *dev); + void (*handle_event)(struct ism_dev *dev, struct ism_event *event); + /* Parameter dmbemask contains a bit vector with updated DMBEs, if sent + * via ism_move_data(). Callback function must handle all active bits + * indicated by dmbemask. + */ + void (*handle_irq)(struct ism_dev *dev, unsigned int bit, u16 dmbemask); + /* Private area - don't touch! */ + struct work_struct remove_work; + struct work_struct add_work; + struct ism_dev *tgt_ism; + u8 id; +}; + +int ism_register_client(struct ism_client *client); +int ism_unregister_client(struct ism_client *client); +static inline void *ism_get_priv(struct ism_dev *dev, + struct ism_client *client) { + return dev->priv[client->id]; +} + +static inline void ism_set_priv(struct ism_dev *dev, struct ism_client *client, + void *priv) { + dev->priv[client->id] = priv; +} + #endif /* _ISM_H */ -- cgit v1.2.3 From 9de4df7b6be1cfca500f8ba21137d53eec45418a Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jan 2023 19:17:50 +0100 Subject: net/smc: Separate SMC-D and ISM APIs We separate the code implementing the struct smcd_ops API in the ISM device driver from the functions that may be used by other exploiters of ISM devices. Note: We start out small, and don't offer the whole breadth of the ISM device for public use, as many functions are specific to or likely only ever used in the context of SMC-D. This is the third part of a bigger overhaul of the interfaces between SMC and ISM. Signed-off-by: Stefan Raspl Signed-off-by: Jan Karcher Signed-off-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/linux/ism.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ism.h b/include/linux/ism.h index 55c8ad306928..bdd29e08d4fe 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -87,4 +87,11 @@ static inline void ism_set_priv(struct ism_dev *dev, struct ism_client *client, dev->priv[client->id] = priv; } +int ism_register_dmb(struct ism_dev *dev, struct ism_dmb *dmb, + struct ism_client *client); +int ism_unregister_dmb(struct ism_dev *dev, struct ism_dmb *dmb); +int ism_move(struct ism_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, + unsigned int offset, void *data, unsigned int size); +u8 *ism_get_seid(void); + #endif /* _ISM_H */ -- cgit v1.2.3 From 820f21009f1bc7a69e28752f6c6d9544401ca526 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jan 2023 19:17:51 +0100 Subject: s390/ism: Consolidate SMC-D-related code The ism module had SMC-D-specific code sprinkled across the entire module. We are now consolidating the SMC-D-specific parts into the latter parts of the module, so it becomes more clear what code is intended for use with ISM, and which parts are glue code for usage in the context of SMC-D. This is the fourth part of a bigger overhaul of the interfaces between SMC and ISM. Signed-off-by: Stefan Raspl Signed-off-by: Jan Karcher Signed-off-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/linux/ism.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ism.h b/include/linux/ism.h index bdd29e08d4fe..104ce2fd503a 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -94,4 +94,6 @@ int ism_move(struct ism_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); u8 *ism_get_seid(void); +const struct smcd_ops *ism_get_smcd_ops(void); + #endif /* _ISM_H */ -- cgit v1.2.3 From 8c81ba20349daf9f7e58bb05a0c12f4b71813a30 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jan 2023 19:17:52 +0100 Subject: net/smc: De-tangle ism and smc device initialization The struct device for ISM devices was part of struct smcd_dev. Move to struct ism_dev, provide a new API call in struct smcd_ops, and convert existing SMCD code accordingly. Furthermore, remove struct smcd_dev from struct ism_dev. This is the final part of a bigger overhaul of the interfaces between SMC and ISM. Signed-off-by: Stefan Raspl Signed-off-by: Jan Karcher Signed-off-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/linux/ism.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ism.h b/include/linux/ism.h index 104ce2fd503a..ea2bcdae7401 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -30,7 +30,6 @@ struct ism_dev { spinlock_t lock; /* protects the ism device */ struct list_head list; struct pci_dev *pdev; - struct smcd_dev *smcd; struct ism_sba *sba; dma_addr_t sba_dma_addr; -- cgit v1.2.3 From e251c21372c07694f547afe3c9828f7f6ef01267 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:42:48 +0100 Subject: of: Introduce of_translate_dma_region() This function is similar to of_translate_dma_address() but also reads a length in addition to an address from a device tree property. Reviewed-by: Rob Herring Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20230120174251.4004100-2-thierry.reding@gmail.com Signed-off-by: Joerg Roedel --- include/linux/of_address.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 265f26eeaf6b..376671594746 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -38,6 +38,8 @@ struct of_pci_range { /* Translate a DMA address from device space to CPU space */ extern u64 of_translate_dma_address(struct device_node *dev, const __be32 *in_addr); +extern const __be32 *of_translate_dma_region(struct device_node *dev, const __be32 *addr, + phys_addr_t *start, size_t *length); #ifdef CONFIG_OF_ADDRESS extern u64 of_translate_address(struct device_node *np, const __be32 *addr); -- cgit v1.2.3 From a5bf3cfce8cb77d9d24613ab52d520896f83dd48 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:42:50 +0100 Subject: iommu: Implement of_iommu_get_resv_regions() This is an implementation that IOMMU drivers can use to obtain reserved memory regions from a device tree node. It uses the reserved-memory DT bindings to find the regions associated with a given device. If these regions are marked accordingly, identity mappings will be created for them in the IOMMU domain that the devices will be attached to. Cc: Frank Rowand Cc: devicetree@vger.kernel.org Reviewed-by: Rob Herring Acked-by: Robin Murphy Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20230120174251.4004100-4-thierry.reding@gmail.com Signed-off-by: Joerg Roedel --- include/linux/of_iommu.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 55c1eb300a86..9a5e6b410dd2 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -12,6 +12,9 @@ extern const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id); +extern void of_iommu_get_resv_regions(struct device *dev, + struct list_head *list); + #else static inline const struct iommu_ops *of_iommu_configure(struct device *dev, @@ -21,6 +24,11 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev, return NULL; } +static inline void of_iommu_get_resv_regions(struct device *dev, + struct list_head *list) +{ +} + #endif /* CONFIG_OF_IOMMU */ #endif /* __OF_IOMMU_H */ -- cgit v1.2.3 From 1369459b2e219a6f4c861404c4f195cd81dcbb40 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:54 -0400 Subject: iommu: Add a gfp parameter to iommu_map() The internal mechanisms support this, but instead of exposting the gfp to the caller it wrappers it into iommu_map() and iommu_map_atomic() Fix this instead of adding more variants for GFP_KERNEL_ACCOUNT. Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Reviewed-by: Mathieu Poirier Link: https://lore.kernel.org/r/1-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 46e1347bfa22..d2020994f292 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -467,7 +467,7 @@ extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, @@ -773,7 +773,7 @@ static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) } static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { return -ENODEV; } -- cgit v1.2.3 From 4dc6376af596d9b2a46fa9baf94c9f2fa5a3d246 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:55 -0400 Subject: iommu: Remove iommu_map_atomic() There is only one call site and it can now just pass the GFP_ATOMIC to the normal iommu_map(). Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d2020994f292..521cd79700f4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -468,8 +468,6 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); -extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, @@ -778,13 +776,6 @@ static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, return -ENODEV; } -static inline int iommu_map_atomic(struct iommu_domain *domain, - unsigned long iova, phys_addr_t paddr, - size_t size, int prot) -{ - return -ENODEV; -} - static inline size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { -- cgit v1.2.3 From f2b2c051be6262edc3c6fce50d3d4f01b59ba228 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:56 -0400 Subject: iommu: Add a gfp parameter to iommu_map_sg() Follow the pattern for iommu_map() and remove iommu_map_sg_atomic(). This allows __iommu_dma_alloc_noncontiguous() to use a GFP_KERNEL allocation here, based on the provided gfp flags. Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 521cd79700f4..d5c16dc33c87 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -474,10 +474,8 @@ extern size_t iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather); extern ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot); -extern ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot); + struct scatterlist *sg, unsigned int nents, + int prot, gfp_t gfp); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); @@ -791,14 +789,7 @@ static inline size_t iommu_unmap_fast(struct iommu_domain *domain, static inline ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) -{ - return -ENODEV; -} - -static inline ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) + unsigned int nents, int prot, gfp_t gfp) { return -ENODEV; } @@ -1109,7 +1100,8 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, static inline size_t iommu_map_sgtable(struct iommu_domain *domain, unsigned long iova, struct sg_table *sgt, int prot) { - return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot); + return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot, + GFP_KERNEL); } #ifdef CONFIG_IOMMU_DEBUGFS -- cgit v1.2.3 From 9cad72dfc5567c66ab0e5a0a2474c5f36c268694 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Sat, 21 Jan 2023 14:58:46 +0000 Subject: usb: gadget: Add support for RZ/V2M USB3DRD driver The RZ/V2M USB3.1 Gen1 Interface (USB) composed of a USB3.1 Gen1 Dual Role Device controller (USB3DRD), a USB3.1 Gen1 Host controller (USB3HOST), a USB3.1 Gen1 Peripheral controller (USB3PERI). The reset for both host and peri are located in USB3DRD block. The USB3DRD registers are mapped in the AXI address space of the Peripheral module. Add USB3DRD driver to handle reset for both host and peri modules. Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20230121145853.4792-6-biju.das.jz@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/rzv2m_usb3drd.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/usb/rzv2m_usb3drd.h (limited to 'include/linux') diff --git a/include/linux/usb/rzv2m_usb3drd.h b/include/linux/usb/rzv2m_usb3drd.h new file mode 100644 index 000000000000..4cc848de229f --- /dev/null +++ b/include/linux/usb/rzv2m_usb3drd.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __RZV2M_USB3DRD_H +#define __RZV2M_USB3DRD_H + +#include + +struct rzv2m_usb3drd { + void __iomem *reg; + int drd_irq; + struct device *dev; + struct reset_control *drd_rstc; +}; + +#if IS_ENABLED(CONFIG_USB_RZV2M_USB3DRD) +void rzv2m_usb3drd_reset(struct device *dev, bool host); +#else +static inline void rzv2m_usb3drd_reset(struct device *dev, bool host) { } +#endif + +#endif /* __RZV2M_USB3DRD_H */ -- cgit v1.2.3 From be234d00240cbb53d5a1fa0e58b2d14ff121dca2 Mon Sep 17 00:00:00 2001 From: "Nancy.Lin" Date: Fri, 13 Jan 2023 18:44:26 +0800 Subject: soc: mediatek: add mtk-mmsys ethdr and mdp_rdma components Add new mmsys component: ethdr_mixer and mdp_rdma. These components will use in mt8195 vdosys1. Signed-off-by: Nancy.Lin Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Tested-by: AngeloGioacchino Del Regno Tested-by: Bo-Chen Chen Link: https://lore.kernel.org/r/20230113104434.28023-4-nancy.lin@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-mmsys.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index b85f66db33e1..3e4b080627c3 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -36,7 +36,16 @@ enum mtk_ddp_comp_id { DDP_COMPONENT_DSI1, DDP_COMPONENT_DSI2, DDP_COMPONENT_DSI3, + DDP_COMPONENT_ETHDR_MIXER, DDP_COMPONENT_GAMMA, + DDP_COMPONENT_MDP_RDMA0, + DDP_COMPONENT_MDP_RDMA1, + DDP_COMPONENT_MDP_RDMA2, + DDP_COMPONENT_MDP_RDMA3, + DDP_COMPONENT_MDP_RDMA4, + DDP_COMPONENT_MDP_RDMA5, + DDP_COMPONENT_MDP_RDMA6, + DDP_COMPONENT_MDP_RDMA7, DDP_COMPONENT_MERGE0, DDP_COMPONENT_MERGE1, DDP_COMPONENT_MERGE2, -- cgit v1.2.3 From 3dd20b715c4483ae5d8ddb22cbc8e116944094e4 Mon Sep 17 00:00:00 2001 From: "Nancy.Lin" Date: Fri, 13 Jan 2023 18:44:29 +0800 Subject: soc: mediatek: add mtk-mmsys config API for mt8195 vdosys1 Add four mmsys config APIs. The config APIs are used for config mmsys reg. Some mmsys regs need to be set according to the HW engine binding to the mmsys simultaneously. 1. mtk_mmsys_merge_async_config: config merge async width/height. async is used for cross-clock domain synchronization. 2. mtk_mmsys_hdr_confing: config hdr backend async width/height. 3. mtk_mmsys_mixer_in_config and mtk_mmsys_mixer_in_config: config mixer related settings. Signed-off-by: Nancy.Lin Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Tested-by: AngeloGioacchino Del Regno Tested-by: Bo-Chen Chen Link: https://lore.kernel.org/r/20230113104434.28023-7-nancy.lin@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-mmsys.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index 3e4b080627c3..5a6753aed482 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -83,4 +83,13 @@ void mtk_mmsys_ddp_disconnect(struct device *dev, void mtk_mmsys_ddp_dpi_fmt_config(struct device *dev, u32 val); +void mtk_mmsys_merge_async_config(struct device *dev, int idx, int width, int height); + +void mtk_mmsys_hdr_config(struct device *dev, int be_width, int be_height); + +void mtk_mmsys_mixer_in_config(struct device *dev, int idx, bool alpha_sel, u16 alpha, + u8 mode, u32 biwidth); + +void mtk_mmsys_mixer_in_channel_swap(struct device *dev, int idx, bool channel_swap); + #endif /* __MTK_MMSYS_H */ -- cgit v1.2.3 From 8af1f6b5bccb61edc40c3c00cefa7be07c1e5a91 Mon Sep 17 00:00:00 2001 From: "Nancy.Lin" Date: Fri, 13 Jan 2023 18:44:30 +0800 Subject: soc: mediatek: add cmdq support of mtk-mmsys config API for mt8195 vdosys1 Add cmdq support for mtk-mmsys config API. The mmsys config register settings need to take effect with the other HW settings(like OVL_ADAPTOR...) at the same vblanking time. If we use CPU to write the mmsys reg, we can't guarantee all the settings can be written in the same vblanking time. Cmdq is used for this purpose. We prepare all the related HW settings in one cmdq packet. The first command in the packet is "wait stream done", and then following with all the HW settings. After the cmdq packet is flush to GCE HW. The GCE waits for the "stream done event" to coming and then starts flushing all the HW settings. This can guarantee all the settings flush in the same vblanking. Signed-off-by: Nancy.Lin Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Tested-by: AngeloGioacchino Del Regno Tested-by: Bo-Chen Chen Link: https://lore.kernel.org/r/20230113104434.28023-8-nancy.lin@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-mmsys.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index 5a6753aed482..dc2963a0a0f7 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -6,6 +6,10 @@ #ifndef __MTK_MMSYS_H #define __MTK_MMSYS_H +#include +#include +#include + enum mtk_ddp_comp_id; struct device; @@ -83,13 +87,16 @@ void mtk_mmsys_ddp_disconnect(struct device *dev, void mtk_mmsys_ddp_dpi_fmt_config(struct device *dev, u32 val); -void mtk_mmsys_merge_async_config(struct device *dev, int idx, int width, int height); +void mtk_mmsys_merge_async_config(struct device *dev, int idx, int width, + int height, struct cmdq_pkt *cmdq_pkt); -void mtk_mmsys_hdr_config(struct device *dev, int be_width, int be_height); +void mtk_mmsys_hdr_config(struct device *dev, int be_width, int be_height, + struct cmdq_pkt *cmdq_pkt); void mtk_mmsys_mixer_in_config(struct device *dev, int idx, bool alpha_sel, u16 alpha, - u8 mode, u32 biwidth); + u8 mode, u32 biwidth, struct cmdq_pkt *cmdq_pkt); -void mtk_mmsys_mixer_in_channel_swap(struct device *dev, int idx, bool channel_swap); +void mtk_mmsys_mixer_in_channel_swap(struct device *dev, int idx, bool channel_swap, + struct cmdq_pkt *cmdq_pkt); #endif /* __MTK_MMSYS_H */ -- cgit v1.2.3 From 3a6a9b3be290299309299a9fd1345521668a2896 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Fri, 16 Dec 2022 12:06:55 -0800 Subject: virtchnl: remove unused structure declaration Nothing uses virtchnl_msg, just remove it. Signed-off-by: Jesse Brandeburg Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index d91af50ac58d..b1cfa84904b1 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -162,19 +162,6 @@ enum virtchnl_ops { #define VIRTCHNL_CHECK_UNION_LEN(n, X) enum virtchnl_static_asset_enum_##X \ { virtchnl_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) } -/* Virtual channel message descriptor. This overlays the admin queue - * descriptor. All other data is passed in external buffers. - */ - -struct virtchnl_msg { - u8 pad[8]; /* AQ flags/opcode/len/retval fields */ - enum virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */ - enum virtchnl_status_code v_retval; /* ditto for desc->retval */ - u32 vfid; /* used by PF when sending to VF */ -}; - -VIRTCHNL_CHECK_STRUCT_LEN(20, virtchnl_msg); - /* Message descriptions and data structures. */ /* VIRTCHNL_OP_VERSION -- cgit v1.2.3 From 43fc70a208ce848f2ec93dadf160ebcc77ec71e8 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Fri, 16 Dec 2022 12:06:56 -0800 Subject: virtchnl: update header and increase header clarity We already have the SPDX header, so just leave a copyright notice with an updated year and get rid of the boilerplate header (so 2002!). In addition, update a couple of comments to clarify how the various parts of the virtchannel header interaction work. No functional changes. Signed-off-by: Jesse Brandeburg Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index b1cfa84904b1..2e685aa37688 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -1,21 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * Contact Information: - * e1000-devel Mailing List - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright (c) 2013-2022, Intel Corporation. */ #ifndef _VIRTCHNL_H_ #define _VIRTCHNL_H_ /* Description: - * This header file describes the VF-PF communication protocol used - * by the drivers for all devices starting from our 40G product line + * This header file describes the Virtual Function (VF) - Physical Function + * (PF) communication protocol used by the drivers for all devices starting + * from our 40G product line * * Admin queue buffer usage: * desc->opcode is always aqc_opc_send_msg_to_pf @@ -29,8 +21,8 @@ * have a maximum of sixteen queues for all of its VSIs. * * The PF is required to return a status code in v_retval for all messages - * except RESET_VF, which does not require any response. The return value - * is of status_code type, defined in the shared type.h. + * except RESET_VF, which does not require any response. The returned value + * is of virtchnl_status_code type, defined here. * * In general, VF driver initialization should roughly follow the order of * these opcodes. The VF driver must first validate the API version of the @@ -323,6 +315,9 @@ VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_rxq_info); * PF configures queues and returns status. * If the number of queues specified is greater than the number of queues * associated with the VSI, an error is returned and no queues are configured. + * NOTE: The VF is not required to configure all queues in a single request. + * It may send multiple messages. PF drivers must correctly handle all VF + * requests. */ struct virtchnl_queue_pair_info { /* NOTE: vsi_id and queue_id should be identical for both queues. */ @@ -360,8 +355,13 @@ struct virtchnl_vf_res_request { * VF uses this message to map vectors to queues. * The rxq_map and txq_map fields are bitmaps used to indicate which queues * are to be associated with the specified vector. - * The "other" causes are always mapped to vector 0. + * The "other" causes are always mapped to vector 0. The VF may not request + * that vector 0 be used for traffic. * PF configures interrupt mapping and returns status. + * NOTE: due to hardware requirements, all active queues (both TX and RX) + * should be mapped to interrupts, even if the driver intends to operate + * only in polling mode. In this case the interrupt may be disabled, but + * the ITR timer will still run to trigger writebacks. */ struct virtchnl_vector_map { u16 vsi_id; @@ -388,6 +388,9 @@ VIRTCHNL_CHECK_STRUCT_LEN(14, virtchnl_irq_map_info); * (Currently, we only support 16 queues per VF, but we make the field * u32 to allow for expansion.) * PF performs requested action and returns status. + * NOTE: The VF is not required to enable/disable all queues in a single + * request. It may send multiple messages. + * PF drivers must correctly handle all VF requests. */ struct virtchnl_queue_select { u16 vsi_id; -- cgit v1.2.3 From 4e4df55941f0784e08e0ab50b04a08c7e1c949eb Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Fri, 16 Dec 2022 12:06:57 -0800 Subject: virtchnl: do structure hardening The virtchnl interface can have a bunch of "soft" defined structures hardened by using explicit sizes for declarations, and then referring to the enum type that uses them in a comment. None of these changes should change any of the structure sizes. Also, remove a duplicate line in a switch statement and let two cases uses the same code. Signed-off-by: Jesse Brandeburg Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 48 +++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 2e685aa37688..5c630c818370 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -214,7 +214,9 @@ enum virtchnl_vsi_type { struct virtchnl_vsi_resource { u16 vsi_id; u16 num_queue_pairs; - enum virtchnl_vsi_type vsi_type; + + /* see enum virtchnl_vsi_type */ + s32 vsi_type; u16 qset_handle; u8 default_mac_addr[ETH_ALEN]; }; @@ -303,7 +305,9 @@ struct virtchnl_rxq_info { u8 rxdid; u8 pad1[2]; u64 dma_ring_addr; - enum virtchnl_rx_hsplit rx_split_pos; /* deprecated with AVF 1.0 */ + + /* see enum virtchnl_rx_hsplit; deprecated with AVF 1.0 */ + s32 rx_split_pos; u32 pad2; }; @@ -955,8 +959,12 @@ enum virtchnl_flow_type { struct virtchnl_filter { union virtchnl_flow_spec data; union virtchnl_flow_spec mask; - enum virtchnl_flow_type flow_type; - enum virtchnl_action action; + + /* see enum virtchnl_flow_type */ + s32 flow_type; + + /* see enum virtchnl_action */ + s32 action; u32 action_meta; u8 field_flags; u8 pad[3]; @@ -984,7 +992,8 @@ enum virtchnl_event_codes { #define PF_EVENT_SEVERITY_CERTAIN_DOOM 255 struct virtchnl_pf_event { - enum virtchnl_event_codes event; + /* see enum virtchnl_event_codes */ + s32 event; union { /* If the PF driver does not support the new speed reporting * capabilities then use link_event else use link_event_adv to @@ -997,6 +1006,7 @@ struct virtchnl_pf_event { struct { enum virtchnl_link_speed link_speed; bool link_status; + u8 pad[3]; } link_event; struct { /* link_speed provided in Mbps */ @@ -1006,7 +1016,7 @@ struct virtchnl_pf_event { } link_event_adv; } event_data; - int severity; + s32 severity; }; VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_pf_event); @@ -1097,7 +1107,7 @@ enum virtchnl_rss_algorithm { #define VIRTCHNL_GET_PROTO_HDR_TYPE(hdr) \ (((hdr)->type) >> PROTO_HDR_SHIFT) #define VIRTCHNL_TEST_PROTO_HDR_TYPE(hdr, val) \ - ((hdr)->type == ((val) >> PROTO_HDR_SHIFT)) + ((hdr)->type == ((s32)((val) >> PROTO_HDR_SHIFT))) #define VIRTCHNL_TEST_PROTO_HDR(hdr, val) \ (VIRTCHNL_TEST_PROTO_HDR_TYPE((hdr), (val)) && \ VIRTCHNL_TEST_PROTO_HDR_FIELD((hdr), (val))) @@ -1193,7 +1203,8 @@ enum virtchnl_proto_hdr_field { }; struct virtchnl_proto_hdr { - enum virtchnl_proto_hdr_type type; + /* see enum virtchnl_proto_hdr_type */ + s32 type; u32 field_selector; /* a bit mask to select field for header type */ u8 buffer[64]; /** @@ -1223,15 +1234,18 @@ VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs); struct virtchnl_rss_cfg { struct virtchnl_proto_hdrs proto_hdrs; /* protocol headers */ - enum virtchnl_rss_algorithm rss_algorithm; /* RSS algorithm type */ - u8 reserved[128]; /* reserve for future */ + + /* see enum virtchnl_rss_algorithm; rss algorithm type */ + s32 rss_algorithm; + u8 reserved[128]; /* reserve for future */ }; VIRTCHNL_CHECK_STRUCT_LEN(2444, virtchnl_rss_cfg); /* action configuration for FDIR */ struct virtchnl_filter_action { - enum virtchnl_action type; + /* see enum virtchnl_action type */ + s32 type; union { /* used for queue and qgroup action */ struct { @@ -1319,7 +1333,9 @@ struct virtchnl_fdir_add { u16 validate_only; /* INPUT */ u32 flow_id; /* OUTPUT */ struct virtchnl_fdir_rule rule_cfg; /* INPUT */ - enum virtchnl_fdir_prgm_status status; /* OUTPUT */ + + /* see enum virtchnl_fdir_prgm_status; OUTPUT */ + s32 status; }; VIRTCHNL_CHECK_STRUCT_LEN(2616, virtchnl_fdir_add); @@ -1332,7 +1348,9 @@ struct virtchnl_fdir_del { u16 vsi_id; /* INPUT */ u16 pad; u32 flow_id; /* INPUT */ - enum virtchnl_fdir_prgm_status status; /* OUTPUT */ + + /* see enum virtchnl_fdir_prgm_status; OUTPUT */ + s32 status; }; VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); @@ -1351,7 +1369,7 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, u8 *msg, u16 msglen) { bool err_msg_format = false; - int valid_len = 0; + u32 valid_len = 0; /* Validate message length. */ switch (v_opcode) { @@ -1492,8 +1510,6 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DISABLE_CHANNELS: break; case VIRTCHNL_OP_ADD_CLOUD_FILTER: - valid_len = sizeof(struct virtchnl_filter); - break; case VIRTCHNL_OP_DEL_CLOUD_FILTER: valid_len = sizeof(struct virtchnl_filter); break; -- cgit v1.2.3 From 2723f3b5d4ff5853503aae368c6b165b57fac651 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Fri, 16 Dec 2022 12:06:58 -0800 Subject: virtchnl: i40e/iavf: rename iwarp to rdma Since the latest Intel hardware does both IWARP and ROCE, rename the term IWARP in the virtchnl header to be RDMA. Do this for both upper and lower case instances. Many of the non-virtchnl.h changes were done with regular expression replacements using perl like: perl -p -i -e 's/_IWARP/_RDMA/' perl -p -i -e 's/_iwarp/_rdma/' and I had to pick up a few instances manually. The virtchnl.h header has some comments and clarity added around when to use certain defines. note: had to fix a checkpatch warning for a long line by wrapping one of the lines I changed. Signed-off-by: Jesse Brandeburg Tested-by: Jakub Andrysiak Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 65 +++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 5c630c818370..c15221dcb75e 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -114,9 +114,13 @@ enum virtchnl_ops { VIRTCHNL_OP_GET_STATS = 15, VIRTCHNL_OP_RSVD = 16, VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ + /* opcode 19 is reserved */ VIRTCHNL_OP_IWARP = 20, /* advanced opcode */ + VIRTCHNL_OP_RDMA = VIRTCHNL_OP_IWARP, VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, /* advanced opcode */ + VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP = VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22, /* advanced opcode */ + VIRTCHNL_OP_RELEASE_RDMA_IRQ_MAP = VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP, VIRTCHNL_OP_CONFIG_RSS_KEY = 23, VIRTCHNL_OP_CONFIG_RSS_LUT = 24, VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, @@ -228,7 +232,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); * TX/RX Checksum offloading and TSO for non-tunnelled packets. */ #define VIRTCHNL_VF_OFFLOAD_L2 BIT(0) -#define VIRTCHNL_VF_OFFLOAD_IWARP BIT(1) +#define VIRTCHNL_VF_OFFLOAD_RDMA BIT(1) +#define VIRTCHNL_VF_CAP_RDMA VIRTCHNL_VF_OFFLOAD_RDMA #define VIRTCHNL_VF_OFFLOAD_RSS_AQ BIT(3) #define VIRTCHNL_VF_OFFLOAD_RSS_REG BIT(4) #define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR BIT(5) @@ -1021,34 +1026,36 @@ struct virtchnl_pf_event { VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_pf_event); -/* VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP - * VF uses this message to request PF to map IWARP vectors to IWARP queues. - * The request for this originates from the VF IWARP driver through - * a client interface between VF LAN and VF IWARP driver. +/* used to specify if a ceq_idx or aeq_idx is invalid */ +#define VIRTCHNL_RDMA_INVALID_QUEUE_IDX 0xFFFF +/* VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP + * VF uses this message to request PF to map RDMA vectors to RDMA queues. + * The request for this originates from the VF RDMA driver through + * a client interface between VF LAN and VF RDMA driver. * A vector could have an AEQ and CEQ attached to it although - * there is a single AEQ per VF IWARP instance in which case - * most vectors will have an INVALID_IDX for aeq and valid idx for ceq. - * There will never be a case where there will be multiple CEQs attached - * to a single vector. + * there is a single AEQ per VF RDMA instance in which case + * most vectors will have an VIRTCHNL_RDMA_INVALID_QUEUE_IDX for aeq and valid + * idx for ceqs There will never be a case where there will be multiple CEQs + * attached to a single vector. * PF configures interrupt mapping and returns status. */ -struct virtchnl_iwarp_qv_info { +struct virtchnl_rdma_qv_info { u32 v_idx; /* msix_vector */ - u16 ceq_idx; - u16 aeq_idx; + u16 ceq_idx; /* set to VIRTCHNL_RDMA_INVALID_QUEUE_IDX if invalid */ + u16 aeq_idx; /* set to VIRTCHNL_RDMA_INVALID_QUEUE_IDX if invalid */ u8 itr_idx; u8 pad[3]; }; -VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_iwarp_qv_info); +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_rdma_qv_info); -struct virtchnl_iwarp_qvlist_info { +struct virtchnl_rdma_qvlist_info { u32 num_vectors; - struct virtchnl_iwarp_qv_info qv_info[1]; + struct virtchnl_rdma_qv_info qv_info[1]; }; -VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_iwarp_qvlist_info); +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_rdma_qvlist_info); /* VF reset states - these are written into the RSTAT register: * VFGEN_RSTAT on the VF @@ -1287,7 +1294,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(2604, virtchnl_fdir_rule); /* Status returned to VF after VF requests FDIR commands * VIRTCHNL_FDIR_SUCCESS * VF FDIR related request is successfully done by PF - * The request can be OP_ADD/DEL. + * The request can be OP_ADD/DEL/QUERY_FDIR_FILTER. * * VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE * OP_ADD_FDIR_FILTER request is failed due to no Hardware resource. @@ -1308,6 +1315,10 @@ VIRTCHNL_CHECK_STRUCT_LEN(2604, virtchnl_fdir_rule); * VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT * OP_ADD/DEL_FDIR_FILTER request is failed due to timing out * for programming. + * + * VIRTCHNL_FDIR_FAILURE_QUERY_INVALID + * OP_QUERY_FDIR_FILTER request is failed due to parameters validation, + * for example, VF query counter of a rule who has no counter action. */ enum virtchnl_fdir_prgm_status { VIRTCHNL_FDIR_SUCCESS = 0, @@ -1317,6 +1328,7 @@ enum virtchnl_fdir_prgm_status { VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST, VIRTCHNL_FDIR_FAILURE_RULE_INVALID, VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT, + VIRTCHNL_FDIR_FAILURE_QUERY_INVALID, }; /* VIRTCHNL_OP_ADD_FDIR_FILTER @@ -1444,7 +1456,7 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_GET_STATS: valid_len = sizeof(struct virtchnl_queue_select); break; - case VIRTCHNL_OP_IWARP: + case VIRTCHNL_OP_RDMA: /* These messages are opaque to us and will be validated in * the RDMA client code. We just need to check for nonzero * length. The firmware will enforce max length restrictions. @@ -1454,19 +1466,16 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, else err_msg_format = true; break; - case VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP: + case VIRTCHNL_OP_RELEASE_RDMA_IRQ_MAP: break; - case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP: - valid_len = sizeof(struct virtchnl_iwarp_qvlist_info); + case VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP: + valid_len = sizeof(struct virtchnl_rdma_qvlist_info); if (msglen >= valid_len) { - struct virtchnl_iwarp_qvlist_info *qv = - (struct virtchnl_iwarp_qvlist_info *)msg; - if (qv->num_vectors == 0) { - err_msg_format = true; - break; - } + struct virtchnl_rdma_qvlist_info *qv = + (struct virtchnl_rdma_qvlist_info *)msg; + valid_len += ((qv->num_vectors - 1) * - sizeof(struct virtchnl_iwarp_qv_info)); + sizeof(struct virtchnl_rdma_qv_info)); } break; case VIRTCHNL_OP_CONFIG_RSS_KEY: -- cgit v1.2.3 From 51a52a29ebaa8395de090fa415c6e1b2899a50f1 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 25 Jan 2023 10:47:34 -0600 Subject: bpf: Pass const struct bpf_prog * to .check_member The .check_member field of struct bpf_struct_ops is currently passed the member's btf_type via const struct btf_type *t, and a const struct btf_member *member. This allows the struct_ops implementation to check whether e.g. an ops is supported, but it would be useful to also enforce that the struct_ops prog being loaded for that member has other qualities, like being sleepable (or not). This patch therefore updates the .check_member() callback to also take a const struct bpf_prog *prog argument. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230125164735.785732-4-void@manifault.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1bec48d9e5d9..0d868ef1b973 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1422,7 +1422,8 @@ struct bpf_struct_ops { const struct bpf_verifier_ops *verifier_ops; int (*init)(struct btf *btf); int (*check_member)(const struct btf_type *t, - const struct btf_member *member); + const struct btf_member *member, + const struct bpf_prog *prog); int (*init_member)(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata); -- cgit v1.2.3 From 7dd880592a88799f3ef48fda507849a75f11cbf0 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 25 Jan 2023 10:47:35 -0600 Subject: bpf/selftests: Verify struct_ops prog sleepable behavior In a set of prior changes, we added the ability for struct_ops programs to be sleepable. This patch enhances the dummy_st_ops selftest suite to validate this behavior by adding a new sleepable struct_ops entry to dummy_st_ops. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230125164735.785732-5-void@manifault.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0d868ef1b973..14a0264fac57 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1474,6 +1474,7 @@ struct bpf_dummy_ops { int (*test_1)(struct bpf_dummy_ops_state *cb); int (*test_2)(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2, char a3, unsigned long a4); + int (*test_sleepable)(struct bpf_dummy_ops_state *cb); }; int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, -- cgit v1.2.3 From c5bc1b3ff35ae321d018d0c4ba66b062e4fb1e05 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Sep 2022 09:37:51 -0400 Subject: fs: uninline inode_query_iversion Reviewed-by: NeilBrown Reviewed-by: Jan Kara Reviewed-by: Christian Brauner Signed-off-by: Jeff Layton --- include/linux/iversion.h | 38 ++------------------------------------ 1 file changed, 2 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iversion.h b/include/linux/iversion.h index e27bd4f55d84..6755d8b4f20b 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -234,42 +234,6 @@ inode_peek_iversion(const struct inode *inode) return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT; } -/** - * inode_query_iversion - read i_version for later use - * @inode: inode from which i_version should be read - * - * Read the inode i_version counter. This should be used by callers that wish - * to store the returned i_version for later comparison. This will guarantee - * that a later query of the i_version will result in a different value if - * anything has changed. - * - * In this implementation, we fetch the current value, set the QUERIED flag and - * then try to swap it into place with a cmpxchg, if it wasn't already set. If - * that fails, we try again with the newly fetched value from the cmpxchg. - */ -static inline u64 -inode_query_iversion(struct inode *inode) -{ - u64 cur, new; - - cur = inode_peek_iversion_raw(inode); - do { - /* If flag is already set, then no need to swap */ - if (cur & I_VERSION_QUERIED) { - /* - * This barrier (and the implicit barrier in the - * cmpxchg below) pairs with the barrier in - * inode_maybe_inc_iversion(). - */ - smp_mb(); - break; - } - - new = cur | I_VERSION_QUERIED; - } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); - return cur >> I_VERSION_QUERIED_SHIFT; -} - /* * For filesystems without any sort of change attribute, the best we can * do is fake one up from the ctime: @@ -283,6 +247,8 @@ static inline u64 time_to_chattr(struct timespec64 *t) return chattr; } +u64 inode_query_iversion(struct inode *inode); + /** * inode_eq_iversion_raw - check whether the raw i_version counter has changed * @inode: inode to check -- cgit v1.2.3 From a3bb710383cbca2a0f1f4e5a1c7ef8dde14eff95 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 22 Aug 2022 09:04:04 -0400 Subject: fs: clarify when the i_version counter must be updated The i_version field in the kernel has had different semantics over the decades, but NFSv4 has certain expectations. Update the comments in iversion.h to describe when the i_version must change. Cc: Colin Walters Cc: NeilBrown Cc: Trond Myklebust Cc: Dave Chinner Reviewed-by: Christian Brauner Signed-off-by: Jeff Layton --- include/linux/iversion.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iversion.h b/include/linux/iversion.h index 6755d8b4f20b..f174ff1b59ee 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -9,8 +9,26 @@ * --------------------------- * The change attribute (i_version) is mandated by NFSv4 and is mostly for * knfsd, but is also used for other purposes (e.g. IMA). The i_version must - * appear different to observers if there was a change to the inode's data or - * metadata since it was last queried. + * appear larger to observers if there was an explicit change to the inode's + * data or metadata since it was last queried. + * + * An explicit change is one that would ordinarily result in a change to the + * inode status change time (aka ctime). i_version must appear to change, even + * if the ctime does not (since the whole point is to avoid missing updates due + * to timestamp granularity). If POSIX or other relevant spec mandates that the + * ctime must change due to an operation, then the i_version counter must be + * incremented as well. + * + * Making the i_version update completely atomic with the operation itself would + * be prohibitively expensive. Traditionally the kernel has updated the times on + * directories after an operation that changes its contents. For regular files, + * the ctime is usually updated before the data is copied into the cache for a + * write. This means that there is a window of time when an observer can + * associate a new timestamp with old file contents. Since the purpose of the + * i_version is to allow for better cache coherency, the i_version must always + * be updated after the results of the operation are visible. Updating it before + * and after a change is also permitted. (Note that no filesystems currently do + * this. Fixing that is a work-in-progress). * * Observers see the i_version as a 64-bit number that never decreases. If it * remains the same since it was last checked, then nothing has changed in the -- cgit v1.2.3 From a1175d6b1bdaf4f74eda47ab18eb44194f9cb796 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sun, 4 Dec 2016 09:29:46 -0500 Subject: vfs: plumb i_version handling into struct kstat The NFS server has a lot of special handling for different types of change attribute access, depending on the underlying filesystem. In most cases, it's doing a getattr anyway and then fetching that value after the fact. Rather that do that, add a new STATX_CHANGE_COOKIE flag that is a kernel-only symbol (for now). If requested and getattr can implement it, it can fill out this field. For IS_I_VERSION inodes, add a generic implementation in vfs_getattr_nosec. Take care to mask STATX_CHANGE_COOKIE off in requests from userland and in the result mask. Since not all filesystems can give the same guarantees of monotonicity, claim a STATX_ATTR_CHANGE_MONOTONIC flag that filesystems can set to indicate that they offer an i_version value that can never go backward. Eventually if we decide to make the i_version available to userland, we can just designate a field for it in struct statx, and move the STATX_CHANGE_COOKIE definition to the uapi header. Reviewed-by: NeilBrown Reviewed-by: Jan Kara Signed-off-by: Jeff Layton --- include/linux/stat.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stat.h b/include/linux/stat.h index ff277ced50e9..52150570d37a 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -52,6 +52,15 @@ struct kstat { u64 mnt_id; u32 dio_mem_align; u32 dio_offset_align; + u64 change_cookie; }; +/* These definitions are internal to the kernel for now. Mainly used by nfsd. */ + +/* mask values */ +#define STATX_CHANGE_COOKIE 0x40000000U /* Want/got stx_change_attr */ + +/* file attribute values */ +#define STATX_ATTR_CHANGE_MONOTONIC 0x8000000000000000ULL /* version monotonically increases */ + #endif -- cgit v1.2.3 From 58a033c9a3e003e048a0431a296e58c6b363b02b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 5 Oct 2022 06:32:23 -0400 Subject: nfsd: remove fetch_iversion export operation Now that the i_version counter is reported in struct kstat, there is no need for this export operation. Acked-by: Chuck Lever Reviewed-by: NeilBrown Signed-off-by: Jeff Layton --- include/linux/exportfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fe848901fcc3..9f4d4bcbf251 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,7 +213,6 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); - u64 (*fetch_iversion)(struct inode *); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ -- cgit v1.2.3 From b8f9301c91bff54570181db9dffa9b5716c3c75d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 13 Jan 2023 23:53:52 +0200 Subject: Revert "gpiolib: of: Introduce hook for missing gpio-ranges" This reverts commit 3550bba25d5587a701e6edf20e20984d2ee72c78. No users for this one, revert it for good. The ->add_pin_ranges() can be used instead. Signed-off-by: Andy Shevchenko Tested-by: Stefan Wahren Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230113215352.44272-5-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 44783fc16125..9adbdb1657db 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -525,18 +525,6 @@ struct gpio_chip { */ int (*of_xlate)(struct gpio_chip *gc, const struct of_phandle_args *gpiospec, u32 *flags); - - /** - * @of_gpio_ranges_fallback: - * - * Optional hook for the case that no gpio-ranges property is defined - * within the device tree node "np" (usually DT before introduction - * of gpio-ranges). So this callback is helpful to provide the - * necessary backward compatibility for the pin ranges. - */ - int (*of_gpio_ranges_fallback)(struct gpio_chip *gc, - struct device_node *np); - #endif /* CONFIG_OF_GPIO */ }; -- cgit v1.2.3 From 9179f5fe41733ff56f7dfcb83c0c1456f6d2e4ae Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 25 Jan 2023 13:02:14 +0200 Subject: net: ethtool: provide shims for stats aggregation helpers when CONFIG_ETHTOOL_NETLINK=n ethtool_aggregate_*_stats() are implemented in net/ethtool/stats.c, a file which is compiled out when CONFIG_ETHTOOL_NETLINK=n. In order to avoid adding Kbuild dependencies from drivers (which call these helpers) on CONFIG_ETHTOOL_NETLINK, let's add some shim definitions which simply make the helpers dead code. This means the function prototypes should have been located in include/linux/ethtool_netlink.h rather than include/linux/ethtool.h. Fixes: 449c5459641a ("net: ethtool: add helpers for aggregate statistics") Reported-by: kernel test robot Signed-off-by: Vladimir Oltean Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230125110214.4127759-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- include/linux/ethtool.h | 11 ----------- include/linux/ethtool_netlink.h | 42 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 6a8253d3fea8..515c78d8eb7c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -991,17 +991,6 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); u32 ethtool_op_get_link(struct net_device *dev); int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti); -void ethtool_aggregate_mac_stats(struct net_device *dev, - struct ethtool_eth_mac_stats *mac_stats); -void ethtool_aggregate_phy_stats(struct net_device *dev, - struct ethtool_eth_phy_stats *phy_stats); -void ethtool_aggregate_ctrl_stats(struct net_device *dev, - struct ethtool_eth_ctrl_stats *ctrl_stats); -void ethtool_aggregate_pause_stats(struct net_device *dev, - struct ethtool_pause_stats *pause_stats); -void ethtool_aggregate_rmon_stats(struct net_device *dev, - struct ethtool_rmon_stats *rmon_stats); - /** * ethtool_mm_frag_size_add_to_min - Translate (standard) additional fragment * size expressed as multiplier into (absolute) minimum fragment size diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index aba348d58ff6..17003b385756 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -29,6 +29,17 @@ int ethnl_cable_test_amplitude(struct phy_device *phydev, u8 pair, s16 mV); int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV); int ethnl_cable_test_step(struct phy_device *phydev, u32 first, u32 last, u32 step); +void ethtool_aggregate_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats); +void ethtool_aggregate_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats); +void ethtool_aggregate_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats); +void ethtool_aggregate_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *pause_stats); +void ethtool_aggregate_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats); + #else static inline int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd) { @@ -70,5 +81,36 @@ static inline int ethnl_cable_test_step(struct phy_device *phydev, u32 first, { return -EOPNOTSUPP; } + +static inline void +ethtool_aggregate_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ +} + +static inline void +ethtool_aggregate_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ +} + +static inline void +ethtool_aggregate_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ +} + +static inline void +ethtool_aggregate_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *pause_stats) +{ +} + +static inline void +ethtool_aggregate_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats) +{ +} + #endif /* IS_ENABLED(CONFIG_ETHTOOL_NETLINK) */ #endif /* _LINUX_ETHTOOL_NETLINK_H_ */ -- cgit v1.2.3 From c24973ed795fec5c12d8a822a0de99a4b7bab394 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 19 Jan 2023 15:09:19 +0200 Subject: gpu: host1x: Implement job tracking using DMA fences In anticipation of removal of the intr API, implement job tracking using DMA fences instead. The main two things about this are making cdma_update schedule the work since fence completion can now be called from interrupt context, and some complication in ensuring the callback is not running when we free the fence. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index dc55d9d3b94f..db6cf6f34361 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -288,8 +289,9 @@ struct host1x_job { u32 syncpt_incrs; u32 syncpt_end; - /* Completion waiter ref */ - void *waiter; + /* Completion fence for job tracking */ + struct dma_fence *fence; + struct dma_fence_cb fence_cb; /* Maximum time to wait for this job */ unsigned int timeout; -- cgit v1.2.3 From d5179020f5ce44fd449790a9c12ef6c1a90a2ca7 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 19 Jan 2023 15:09:21 +0200 Subject: gpu: host1x: External timeout/cancellation for fences Currently all fences have a 30 second timeout to ensure they are cleaned up if the fence never completes otherwise. However, this one size fits all solution doesn't actually fit in every case, such as syncpoint waiting where we want to be able to have timeouts longer than 30 seconds. As such, we want to be able to give control over fence cancellation to the caller (and maybe eventually get rid of the internal timeout altogether). Here we add this cancellation mechanism by essentially adding a function for entering the timeout path by function call, and changing the syncpoint wait function to use it. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index db6cf6f34361..9a9de4b97a25 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -222,7 +222,9 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, u32 syncpt_id); -struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold); +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold, + bool timeout); +void host1x_fence_cancel(struct dma_fence *fence); /* * host1x channel -- cgit v1.2.3 From 3089386db0901ac6ac3d99fbd601212c98217e60 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 24 Jan 2023 13:54:57 +0200 Subject: xfrm: extend add policy callback to set failure reason Almost all validation logic is in the drivers, but they are missing reliable way to convey failure reason to userspace applications. Let's use extack to return this information to users. Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aad12a179e54..7c43b9fb9aae 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1042,7 +1042,7 @@ struct xfrmdev_ops { struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); void (*xdo_dev_state_update_curlft) (struct xfrm_state *x); - int (*xdo_dev_policy_add) (struct xfrm_policy *x); + int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack); void (*xdo_dev_policy_delete) (struct xfrm_policy *x); void (*xdo_dev_policy_free) (struct xfrm_policy *x); }; -- cgit v1.2.3 From 7681a4f58fb9c338d6dfe1181607f84c793d77de Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 24 Jan 2023 13:54:59 +0200 Subject: xfrm: extend add state callback to set failure reason Almost all validation logic is in the drivers, but they are missing reliable way to convey failure reason to userspace applications. Let's use extack to return this information to users. Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c43b9fb9aae..63b77cbc947e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1035,7 +1035,7 @@ struct netdev_bpf { #ifdef CONFIG_XFRM_OFFLOAD struct xfrmdev_ops { - int (*xdo_dev_state_add) (struct xfrm_state *x); + int (*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack); void (*xdo_dev_state_delete) (struct xfrm_state *x); void (*xdo_dev_state_free) (struct xfrm_state *x); bool (*xdo_dev_offload_ok) (struct sk_buff *skb, -- cgit v1.2.3 From 1ddc7618294084fff8d673217a9479550990ee84 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 23 Jan 2023 12:59:45 +0530 Subject: bus: mhi: ep: Change state_lock to mutex state_lock, the spinlock type is meant to protect race against concurrent MHI state transitions. In mhi_ep_set_m0_state(), while the state_lock is being held, the channels are resumed in mhi_ep_resume_channels() if the previous state was M3. This causes sleeping in atomic bug, since mhi_ep_resume_channels() use mutex internally. Since the state_lock is supposed to be held throughout the state change, it is not ideal to drop the lock before calling mhi_ep_resume_channels(). So to fix this issue, let's change the type of state_lock to mutex. This would also allow holding the lock throughout all state transitions thereby avoiding any potential race. Cc: # 5.19 Fixes: e4b7b5f0f30a ("bus: mhi: ep: Add support for suspending and resuming channels") Reported-by: Dan Carpenter Reviewed-by: Jeffrey Hugo Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi_ep.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi_ep.h b/include/linux/mhi_ep.h index 478aece17046..f198a8ac7ee7 100644 --- a/include/linux/mhi_ep.h +++ b/include/linux/mhi_ep.h @@ -70,8 +70,8 @@ struct mhi_ep_db_info { * @cmd_ctx_cache_phys: Physical address of the host command context cache * @chdb: Array of channel doorbell interrupt info * @event_lock: Lock for protecting event rings - * @list_lock: Lock for protecting state transition and channel doorbell lists * @state_lock: Lock for protecting state transitions + * @list_lock: Lock for protecting state transition and channel doorbell lists * @st_transition_list: List of state transitions * @ch_db_list: List of queued channel doorbells * @wq: Dedicated workqueue for handling rings and state changes @@ -117,8 +117,8 @@ struct mhi_ep_cntrl { struct mhi_ep_db_info chdb[4]; struct mutex event_lock; + struct mutex state_lock; spinlock_t list_lock; - spinlock_t state_lock; struct list_head st_transition_list; struct list_head ch_db_list; -- cgit v1.2.3 From 206351c5c2d9906b0304c5b10d5162707d5d4bcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:35 +0200 Subject: mfd: intel-m10-bmc: Add missing includes to header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/mfd/intel-m10-bmc.h is using: - pr_err(), thus include also linux/dev_printk.h - FIELD_GET(), this include also linux/bitfield.h - GENMASK(), thus include also linux/bits.h Signed-off-by: Ilpo Järvinen Reviewed-by: Russ Weight Reviewed-by: Xu Yilun Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-2-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index f0044b14136e..0d4db5d9d5af 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -7,6 +7,9 @@ #ifndef __MFD_INTEL_M10_BMC_H #define __MFD_INTEL_M10_BMC_H +#include +#include +#include #include #define M10BMC_LEGACY_BUILD_VER 0x300468 -- cgit v1.2.3 From 16e5d95a5c451027a2e7ef89dd146a1c6c74ca6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:36 +0200 Subject: mfd: intel-m10-bmc: Create m10bmc_platform_info for type specific info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BMC type specific info is currently set by a switch/case block. The size of this info is expected to grow as more dev types and features are added which would have made the switch block bloaty. Store type specific info into struct and place them into .driver_data instead because it makes things a bit cleaner. The m10bmc_type enum can be dropped as the differentiation is now fully handled by the platform info. The info member of struct intel_m10bmc that is added here is not used yet in this change but its addition logically still belongs to this change. The CSR map change that comes after this change needs to have the info member. Reviewed-by: Russ Weight Reviewed-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-3-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index 0d4db5d9d5af..f418cad88e64 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -121,14 +121,26 @@ /* Address of 4KB inverted bit vector containing staging area FLASH count */ #define STAGING_FLASH_COUNT 0x17ffb000 +/** + * struct intel_m10bmc_platform_info - Intel MAX 10 BMC platform specific information + * @cells: MFD cells + * @n_cells: MFD cells ARRAY_SIZE() + */ +struct intel_m10bmc_platform_info { + struct mfd_cell *cells; + int n_cells; +}; + /** * struct intel_m10bmc - Intel MAX 10 BMC parent driver data structure * @dev: this device * @regmap: the regmap used to access registers by m10bmc itself + * @info: the platform information for MAX10 BMC */ struct intel_m10bmc { struct device *dev; struct regmap *regmap; + const struct intel_m10bmc_platform_info *info; }; /* -- cgit v1.2.3 From 603aed8ffd4c9cb633c05a514cfb5e8ca6b0751d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:38 +0200 Subject: mfd: intel-m10-bmc: Split into core and spi specific parts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the common code from intel-m10-bmc driver into intel-m10-bmc-core and move the SPI bus parts into an interface specific file. intel-m10-bmc-core becomes the core MFD functions which can support multiple bus interface like SPI bus. Co-developed-by: Tianfei zhang Signed-off-by: Tianfei zhang Reviewed-by: Russ Weight Acked-by: Guenter Roeck # hwmon Reviewed-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-5-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index f418cad88e64..a80deb61b69a 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -174,4 +174,10 @@ m10bmc_raw_read(struct intel_m10bmc *m10bmc, unsigned int addr, #define m10bmc_sys_read(m10bmc, offset, val) \ m10bmc_raw_read(m10bmc, M10BMC_SYS_BASE + (offset), val) +/* + * MAX10 BMC Core support + */ +int m10bmc_dev_init(struct intel_m10bmc *m10bmc, const struct intel_m10bmc_platform_info *info); +extern const struct attribute_group *m10bmc_dev_groups[]; + #endif /* __MFD_INTEL_M10_BMC_H */ -- cgit v1.2.3 From 6052a005caf9cd484fe6368a31c736ac17ebaf66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:39 +0200 Subject: mfd: intel-m10-bmc: Support multiple CSR register layouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are different addresses for the MAX10 CSR registers. Introducing a new data structure m10bmc_csr_map for the register definition of MAX10 CSR. Provide the csr_map for SPI. Co-developed-by: Tianfei zhang Signed-off-by: Tianfei zhang Reviewed-by: Russ Weight Reviewed-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-6-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index a80deb61b69a..d569a72c7d4f 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -121,14 +121,39 @@ /* Address of 4KB inverted bit vector containing staging area FLASH count */ #define STAGING_FLASH_COUNT 0x17ffb000 +/** + * struct m10bmc_csr_map - Intel MAX 10 BMC CSR register map + */ +struct m10bmc_csr_map { + unsigned int base; + unsigned int build_version; + unsigned int fw_version; + unsigned int mac_low; + unsigned int mac_high; + unsigned int doorbell; + unsigned int auth_result; + unsigned int bmc_prog_addr; + unsigned int bmc_reh_addr; + unsigned int bmc_magic; + unsigned int sr_prog_addr; + unsigned int sr_reh_addr; + unsigned int sr_magic; + unsigned int pr_prog_addr; + unsigned int pr_reh_addr; + unsigned int pr_magic; + unsigned int rsu_update_counter; +}; + /** * struct intel_m10bmc_platform_info - Intel MAX 10 BMC platform specific information * @cells: MFD cells * @n_cells: MFD cells ARRAY_SIZE() + * @csr_map: the mappings for register definition of MAX10 BMC */ struct intel_m10bmc_platform_info { struct mfd_cell *cells; int n_cells; + const struct m10bmc_csr_map *csr_map; }; /** @@ -167,12 +192,17 @@ m10bmc_raw_read(struct intel_m10bmc *m10bmc, unsigned int addr, * The base of the system registers could be configured by HW developers, and * in HW SPEC, the base is not added to the addresses of the system registers. * - * This macro helps to simplify the accessing of the system registers. And if + * This function helps to simplify the accessing of the system registers. And if * the base is reconfigured in HW, SW developers could simply change the - * M10BMC_SYS_BASE accordingly. + * csr_map's base accordingly. */ -#define m10bmc_sys_read(m10bmc, offset, val) \ - m10bmc_raw_read(m10bmc, M10BMC_SYS_BASE + (offset), val) +static inline int m10bmc_sys_read(struct intel_m10bmc *m10bmc, unsigned int offset, + unsigned int *val) +{ + const struct m10bmc_csr_map *csr_map = m10bmc->info->csr_map; + + return m10bmc_raw_read(m10bmc, csr_map->base + offset, val); +} /* * MAX10 BMC Core support -- cgit v1.2.3 From bcababfc60ccc622268b2317a22fabd879fbc0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:41 +0200 Subject: mfd: intel-m10-bmc: Prefix register defines with M10BMC_N3000 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prefix the M10BMC defines register defines with M10BMC_N3000 to make it more obvious these are related to some board type. All current non-N3000 board types have the same layout so they'll be reused. The less generic makes it more obvious they're not meant for the generic/interface agnostic code. Reviewed-by: Russ Weight Reviewed-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-8-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 66 +++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index d569a72c7d4f..470dc3773c01 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -12,38 +12,38 @@ #include #include -#define M10BMC_LEGACY_BUILD_VER 0x300468 -#define M10BMC_SYS_BASE 0x300800 -#define M10BMC_SYS_END 0x300fff -#define M10BMC_FLASH_BASE 0x10000000 -#define M10BMC_FLASH_END 0x1fffffff -#define M10BMC_MEM_END M10BMC_FLASH_END +#define M10BMC_N3000_LEGACY_BUILD_VER 0x300468 +#define M10BMC_N3000_SYS_BASE 0x300800 +#define M10BMC_N3000_SYS_END 0x300fff +#define M10BMC_N3000_FLASH_BASE 0x10000000 +#define M10BMC_N3000_FLASH_END 0x1fffffff +#define M10BMC_N3000_MEM_END M10BMC_N3000_FLASH_END #define M10BMC_STAGING_BASE 0x18000000 #define M10BMC_STAGING_SIZE 0x3800000 /* Register offset of system registers */ -#define NIOS2_FW_VERSION 0x0 -#define M10BMC_MAC_LOW 0x10 -#define M10BMC_MAC_BYTE4 GENMASK(7, 0) -#define M10BMC_MAC_BYTE3 GENMASK(15, 8) -#define M10BMC_MAC_BYTE2 GENMASK(23, 16) -#define M10BMC_MAC_BYTE1 GENMASK(31, 24) -#define M10BMC_MAC_HIGH 0x14 -#define M10BMC_MAC_BYTE6 GENMASK(7, 0) -#define M10BMC_MAC_BYTE5 GENMASK(15, 8) -#define M10BMC_MAC_COUNT GENMASK(23, 16) -#define M10BMC_TEST_REG 0x3c -#define M10BMC_BUILD_VER 0x68 -#define M10BMC_VER_MAJOR_MSK GENMASK(23, 16) -#define M10BMC_VER_PCB_INFO_MSK GENMASK(31, 24) -#define M10BMC_VER_LEGACY_INVALID 0xffffffff +#define NIOS2_N3000_FW_VERSION 0x0 +#define M10BMC_N3000_MAC_LOW 0x10 +#define M10BMC_N3000_MAC_BYTE4 GENMASK(7, 0) +#define M10BMC_N3000_MAC_BYTE3 GENMASK(15, 8) +#define M10BMC_N3000_MAC_BYTE2 GENMASK(23, 16) +#define M10BMC_N3000_MAC_BYTE1 GENMASK(31, 24) +#define M10BMC_N3000_MAC_HIGH 0x14 +#define M10BMC_N3000_MAC_BYTE6 GENMASK(7, 0) +#define M10BMC_N3000_MAC_BYTE5 GENMASK(15, 8) +#define M10BMC_N3000_MAC_COUNT GENMASK(23, 16) +#define M10BMC_N3000_TEST_REG 0x3c +#define M10BMC_N3000_BUILD_VER 0x68 +#define M10BMC_N3000_VER_MAJOR_MSK GENMASK(23, 16) +#define M10BMC_N3000_VER_PCB_INFO_MSK GENMASK(31, 24) +#define M10BMC_N3000_VER_LEGACY_INVALID 0xffffffff /* Secure update doorbell register, in system register region */ -#define M10BMC_DOORBELL 0x400 +#define M10BMC_N3000_DOORBELL 0x400 /* Authorization Result register, in system register region */ -#define M10BMC_AUTH_RESULT 0x404 +#define M10BMC_N3000_AUTH_RESULT 0x404 /* Doorbell register fields */ #define DRBL_RSU_REQUEST BIT(0) @@ -106,20 +106,20 @@ #define RSU_COMPLETE_TIMEOUT_MS (40 * 60 * 1000) /* Addresses for security related data in FLASH */ -#define BMC_REH_ADDR 0x17ffc004 -#define BMC_PROG_ADDR 0x17ffc000 -#define BMC_PROG_MAGIC 0x5746 +#define M10BMC_N3000_BMC_REH_ADDR 0x17ffc004 +#define M10BMC_N3000_BMC_PROG_ADDR 0x17ffc000 +#define M10BMC_N3000_BMC_PROG_MAGIC 0x5746 -#define SR_REH_ADDR 0x17ffd004 -#define SR_PROG_ADDR 0x17ffd000 -#define SR_PROG_MAGIC 0x5253 +#define M10BMC_N3000_SR_REH_ADDR 0x17ffd004 +#define M10BMC_N3000_SR_PROG_ADDR 0x17ffd000 +#define M10BMC_N3000_SR_PROG_MAGIC 0x5253 -#define PR_REH_ADDR 0x17ffe004 -#define PR_PROG_ADDR 0x17ffe000 -#define PR_PROG_MAGIC 0x5250 +#define M10BMC_N3000_PR_REH_ADDR 0x17ffe004 +#define M10BMC_N3000_PR_PROG_ADDR 0x17ffe000 +#define M10BMC_N3000_PR_PROG_MAGIC 0x5250 /* Address of 4KB inverted bit vector containing staging area FLASH count */ -#define STAGING_FLASH_COUNT 0x17ffb000 +#define M10BMC_N3000_STAGING_FLASH_COUNT 0x17ffb000 /** * struct m10bmc_csr_map - Intel MAX 10 BMC CSR register map -- cgit v1.2.3 From 001a734a55d09aa1716eb2cd5ccab8b4d7a068a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:43 +0200 Subject: fpga: m10bmc-sec: Make rsu status type specific MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rsu status field moves from the doorbell register to the auth result register in the PMCI implementation of the MAX10 BMC. In order to prepare for that, refactor the sec update driver code to have a type specific ops that provides ->rsu_status(). Co-developed-by: Tianfei zhang Signed-off-by: Tianfei zhang Co-developed-by: Russ Weight Signed-off-by: Russ Weight Signed-off-by: Ilpo Järvinen Acked-by: Xu Yilun Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-10-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index 470dc3773c01..1f75b33240ad 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -91,7 +91,6 @@ #define HOST_STATUS_ABORT_RSU 0x2 #define rsu_prog(doorbell) FIELD_GET(DRBL_RSU_PROGRESS, doorbell) -#define rsu_stat(doorbell) FIELD_GET(DRBL_RSU_STATUS, doorbell) /* interval 100ms and timeout 5s */ #define NIOS_HANDSHAKE_INTERVAL_US (100 * 1000) -- cgit v1.2.3 From 869b9eddf0b38a22c27a400e2fa849d2ff2aa7e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:44 +0200 Subject: mfd: intel-m10-bmc: Add PMCI driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the mfd driver for the Platform Management Component Interface (PMCI) based interface of Intel MAX10 BMC controller. PMCI is a software-visible interface, connected to card BMC which provided the basic functionality of read/write BMC register. The access to the register is done indirectly via a hardware controller/bridge that handles read/write/clear commands and acknowledgments for the commands. Previously, intel-m10-bmc provided sysfs under /sys/bus/spi/devices/... which is generalized in this change because not all MAX10 BMC appear under SPI anymore. Co-developed-by: Tianfei zhang Signed-off-by: Tianfei zhang Co-developed-by: Russ Weight Signed-off-by: Russ Weight Co-developed-by: Matthew Gerlach Signed-off-by: Matthew Gerlach Reviewed-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-11-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index 1f75b33240ad..810534b1bd12 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -120,6 +120,34 @@ /* Address of 4KB inverted bit vector containing staging area FLASH count */ #define M10BMC_N3000_STAGING_FLASH_COUNT 0x17ffb000 +#define M10BMC_N6000_INDIRECT_BASE 0x400 + +#define M10BMC_N6000_SYS_BASE 0x0 +#define M10BMC_N6000_SYS_END 0xfff + +#define M10BMC_N6000_DOORBELL 0x1c0 +#define M10BMC_N6000_AUTH_RESULT 0x1c4 + +#define M10BMC_N6000_BUILD_VER 0x0 +#define NIOS2_N6000_FW_VERSION 0x4 +#define M10BMC_N6000_MAC_LOW 0x20 +#define M10BMC_N6000_MAC_HIGH (M10BMC_N6000_MAC_LOW + 4) + +/* Addresses for security related data in FLASH */ +#define M10BMC_N6000_BMC_REH_ADDR 0x7ffc004 +#define M10BMC_N6000_BMC_PROG_ADDR 0x7ffc000 +#define M10BMC_N6000_BMC_PROG_MAGIC 0x5746 + +#define M10BMC_N6000_SR_REH_ADDR 0x7ffd004 +#define M10BMC_N6000_SR_PROG_ADDR 0x7ffd000 +#define M10BMC_N6000_SR_PROG_MAGIC 0x5253 + +#define M10BMC_N6000_PR_REH_ADDR 0x7ffe004 +#define M10BMC_N6000_PR_PROG_ADDR 0x7ffe000 +#define M10BMC_N6000_PR_PROG_MAGIC 0x5250 + +#define M10BMC_N6000_STAGING_FLASH_COUNT 0x7ff5000 + /** * struct m10bmc_csr_map - Intel MAX 10 BMC CSR register map */ -- cgit v1.2.3 From abc3100fcba6827444ef4bdb17065ac3b6619dff Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 20 Jan 2023 12:45:17 +0100 Subject: leds: led-class: Add generic [devm_]led_get() Add a generic [devm_]led_get() method which can be used on both devicetree and non devicetree platforms to get a LED classdev associated with a specific function on a specific device, e.g. the privacy LED associated with a specific camera sensor. Note unlike of_led_get() this takes a string describing the function rather then an index. This is done because e.g. camera sensors might have a privacy LED, or a flash LED, or both and using an index approach leaves it unclear what the function of index 0 is if there is only 1 LED. This uses a lookup-table mechanism for non devicetree platforms. This allows the platform code to map specific LED class_dev-s to a specific device,function combinations this way. For devicetree platforms getting the LED by function-name could be made to work using the standard devicetree pattern of adding a -names string array to map names to the indexes. Reviewed-by: Linus Walleij Signed-off-by: Hans de Goede Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230120114524.408368-5-hdegoede@redhat.com --- include/linux/leds.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index ba4861ec73d3..31cb74b90ffc 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -39,6 +39,21 @@ enum led_default_state { LEDS_DEFSTATE_KEEP = 2, }; +/** + * struct led_lookup_data - represents a single LED lookup entry + * + * @list: internal list of all LED lookup entries + * @provider: name of led_classdev providing the LED + * @dev_id: name of the device associated with this LED + * @con_id: name of the LED from the device's point of view + */ +struct led_lookup_data { + struct list_head list; + const char *provider; + const char *dev_id; + const char *con_id; +}; + struct led_init_data { /* device fwnode handle */ struct fwnode_handle *fwnode; @@ -211,6 +226,12 @@ void devm_led_classdev_unregister(struct device *parent, void led_classdev_suspend(struct led_classdev *led_cdev); void led_classdev_resume(struct led_classdev *led_cdev); +void led_add_lookup(struct led_lookup_data *led_lookup); +void led_remove_lookup(struct led_lookup_data *led_lookup); + +struct led_classdev *__must_check led_get(struct device *dev, char *con_id); +struct led_classdev *__must_check devm_led_get(struct device *dev, char *con_id); + extern struct led_classdev *of_led_get(struct device_node *np, int index); extern void led_put(struct led_classdev *led_cdev); struct led_classdev *__must_check devm_of_led_get(struct device *dev, -- cgit v1.2.3 From ac62f60619fa5b53144fefdca6d2a219125a0228 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2023 23:14:14 -0800 Subject: net: add missing includes of linux/net.h linux/net.h will soon not be included by linux/skbuff.h. Fix the cases where source files were depending on the implicit include. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/igmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 78890143f079..b19d3284551f 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -15,6 +15,7 @@ #include #include #include +#include #include static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb) -- cgit v1.2.3 From 9a859da287870715a22ce05d6ae377ae8ac79cc3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2023 23:14:15 -0800 Subject: net: skbuff: drop the linux/net.h include It appears nothing needs it. The kernel builds fine with this include removed, building an otherwise empty source file with: #include #ifdef _LINUX_NET_H #error linux/net.h is back #endif works too (meaning net.h is not just pulled in indirectly). This gives us a slight 0.5% reduction in the pre-processed size of skbuff.h. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4c8492401a10..b93818e11da0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 2195e2a024aef567aea6ea0b0dab52f77bcc7b55 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2023 23:14:17 -0800 Subject: net: skbuff: drop the linux/textsearch.h include This include was added for skb_find_text() but all we need there is a forward declaration of struct ts_config. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b93818e11da0..7eeb06f9ca1f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -279,6 +278,7 @@ struct napi_struct; struct bpf_prog; union bpf_attr; struct skb_ext; +struct ts_config; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { -- cgit v1.2.3 From 2870c4d6a5e479659cb84992717189634c1e71e0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2023 23:14:18 -0800 Subject: net: add missing includes of linux/sched/clock.h Number of files depend on linux/sched/clock.h getting included by linux/skbuff.h which soon will no longer be the case. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ccc4a4a58c72..1727898f1641 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 9ac849f2c4925a29749d27586790f8365ee91889 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2023 23:14:19 -0800 Subject: net: skbuff: drop the linux/sched/clock.h include It used to be necessary for skb_mstamp_* static inlines, but those are gone since we moved to usec timestamps in TCP, in 2017. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7eeb06f9ca1f..aa920591ba37 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 422164224e32525f88aa9633dea176484fe2c50c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2023 23:14:20 -0800 Subject: net: skbuff: drop the linux/sched.h include linux/sched.h was added for skb_mstamp_* (all the way back before linux/sched.h got split and linux/sched/clock.h created). We don't need it in skbuff.h any more. Sadly this change is currently a noop because linux/dma-mapping.h and net/page_pool.h pull in half of the universe. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index aa920591ba37..a1978adc5644 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 5255c0ca798347fbfa9e2ec24276d6515cf2a7e3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2023 23:14:22 -0800 Subject: net: skbuff: drop the linux/splice.h include splice.h is included since commit a60e3cc7c929 ("net: make skb_splice_bits more configureable") but really even then all we needed is some forward declarations. Most of that code is now gone, and remaining has fwd declarations. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a1978adc5644..c6fd5d5b50e0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 9dd0db2b1303a489dd9a6d6de499fa89e6f88b93 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2023 23:14:23 -0800 Subject: net: skbuff: drop the linux/hrtimer.h include linux/hrtimer.h include was added because apparently it used to contain ktime related code. This is no longer the case and we include linux/time.h explicitly. Sadly this change is currently a noop because linux/dma-mapping.h and net/page_pool.h pull in half of the universe. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c6fd5d5b50e0..5ba12185f43e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From a77ad4bf792652340ab334956e69b46ec2fdaefb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:03 +0100 Subject: of: device: make of_device_uevent_modalias() take a const device * of_device_uevent_modalias() does not modify the device pointer passed to it, so mark it constant. In order to properly do this, a number of busses need to have a modalias function added as they were attempting to just point to of_device_uevent_modalias instead of their bus-specific modalias function. This is fine except if the prototype for a bus and device type modalias function diverges and then problems could happen. To prevent all of that, just wrap the call to of_device_uevent_modalias() directly for each bus and device type individually. Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Chen-Yu Tsai Cc: Jernej Skrabec Cc: Samuel Holland Cc: David Airlie Cc: Daniel Vetter Cc: Benjamin Herrenschmidt Cc: Rob Herring Cc: Frank Rowand Cc: Liang He Cc: Thomas Gleixner Cc: Christophe JAILLET Cc: Thomas Zimmermann Cc: Dmitry Baryshkov Cc: Douglas Anderson Cc: Lyude Paul Cc: Corentin Labbe Cc: Zou Wei Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@lists.linux.dev Cc: dri-devel@lists.freedesktop.org Cc: devicetree@vger.kernel.org Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/of_device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index ab7d557d541d..f4b57614979d 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -36,7 +36,7 @@ extern ssize_t of_device_modalias(struct device *dev, char *str, ssize_t len); extern int of_device_request_module(struct device *dev); extern void of_device_uevent(const struct device *dev, struct kobj_uevent_env *env); -extern int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env); +extern int of_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env); static inline struct device_node *of_cpu_device_node_get(int cpu) { @@ -83,7 +83,7 @@ static inline int of_device_request_module(struct device *dev) return -ENODEV; } -static inline int of_device_uevent_modalias(struct device *dev, +static inline int of_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env) { return -ENODEV; -- cgit v1.2.3 From fa838c8ce53714eba788ea877520788bc72c27bd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:04 +0100 Subject: i3c: move dev_to_i3cdev() to use container_of_const() The driver core is changing to pass some pointers as const, so move dev_to_i3cdev() to use container_of_const() to handle this change. dev_to_i3cdev() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. Acked-by: Alexandre Belloni Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-3-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/i3c/device.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 1c997abe868c..68b558929aec 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -186,7 +186,14 @@ static inline struct i3c_driver *drv_to_i3cdrv(struct device_driver *drv) } struct device *i3cdev_to_dev(struct i3c_device *i3cdev); -struct i3c_device *dev_to_i3cdev(struct device *dev); + +/** + * dev_to_i3cdev() - Returns the I3C device containing @dev + * @dev: device object + * + * Return: a pointer to an I3C device object. + */ +#define dev_to_i3cdev(__dev) container_of_const(__dev, struct i3c_device, dev) const struct i3c_device_id * i3c_device_match_id(struct i3c_device *i3cdev, -- cgit v1.2.3 From 34be683add6ccdbc9d589b79077ea7f48ff7ee0c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:05 +0100 Subject: platform/surface: aggregator: move to_ssam_device() to use container_of_const() The driver core is changing to pass some pointers as const, so move to_ssam_device() to use container_of_const() to handle this change. to_ssam_device() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. Reviewed-by: Maximilian Luz Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-4-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/surface_aggregator/device.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 46c45d1b6368..24151a0e2c96 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -229,10 +229,7 @@ static inline bool is_ssam_device(struct device *d) * Return: Returns a pointer to the &struct ssam_device wrapping the given * device @d. */ -static inline struct ssam_device *to_ssam_device(struct device *d) -{ - return container_of(d, struct ssam_device, dev); -} +#define to_ssam_device(d) container_of_const(d, struct ssam_device, dev) /** * to_ssam_device_driver() - Casts the given device driver to a SSAM client -- cgit v1.2.3 From 49b7fc1c25481c823e391b5617546b1ad4af0852 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:06 +0100 Subject: firewire: move fw_device() and fw_unit() to use container_of_const() The driver core is changing to pass some pointers as const, so move fw_device() and fw_unit() functions to use container_of_const() to handle this change. fw_device() and fw_unit() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. This also required turning fw_parent_device() into a macro to preserve the const-ness of the pointer passed into it if necessary. Cc: Stefan Richter Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-5-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/firewire.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 980019053e54..03ac31ada5e6 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -208,10 +208,7 @@ struct fw_device { struct fw_attribute_group attribute_group; }; -static inline struct fw_device *fw_device(struct device *dev) -{ - return container_of(dev, struct fw_device, device); -} +#define fw_device(dev) container_of_const(dev, struct fw_device, device) static inline int fw_device_is_shutdown(struct fw_device *device) { @@ -229,10 +226,7 @@ struct fw_unit { struct fw_attribute_group attribute_group; }; -static inline struct fw_unit *fw_unit(struct device *dev) -{ - return container_of(dev, struct fw_unit, device); -} +#define fw_unit(dev) container_of_const(dev, struct fw_unit, device) static inline struct fw_unit *fw_unit_get(struct fw_unit *unit) { @@ -246,10 +240,7 @@ static inline void fw_unit_put(struct fw_unit *unit) put_device(&unit->device); } -static inline struct fw_device *fw_parent_device(struct fw_unit *unit) -{ - return fw_device(unit->device.parent); -} +#define fw_parent_device(unit) fw_device(unit->device.parent) struct ieee1394_device_id; -- cgit v1.2.3 From 162736b0d71a9630f7c99dda7cefd5600fa03d69 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:07 +0100 Subject: driver core: make struct device_type.uevent() take a const * The uevent() callback in struct device_type should not be modifying the device that is passed into it, so mark it as a const * and propagate the function signature changes out into all relevant subsystems that use this callback. Cc: Alan Stern Cc: Andreas Noever Cc: Andy Shevchenko Cc: Bard Liao Cc: Chaitanya Kulkarni Cc: Dan Williams Cc: Dmitry Torokhov Cc: Frank Rowand Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Jilin Yuan Cc: Jiri Slaby Cc: Len Brown Cc: Mark Gross Cc: "Martin K. Petersen" Cc: "Matthew Wilcox (Oracle)" Cc: Maximilian Luz Cc: Michael Jamet Cc: Ming Lei Cc: Pierre-Louis Bossart Cc: Rob Herring Cc: Sakari Ailus Cc: Sanyog Kale Cc: Sean Young Cc: Stefan Richter Cc: Thomas Gleixner Cc: Won Chung Cc: Yehezkel Bernat Acked-by: Rafael J. Wysocki Acked-by: Mika Westerberg # for Thunderbolt Acked-by: Mauro Carvalho Chehab Acked-by: Alexandre Belloni Acked-by: Heikki Krogerus Acked-by: Wolfram Sang Acked-by: Vinod Koul Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-6-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/acpi.h | 4 ++-- include/linux/device.h | 2 +- include/linux/i3c/device.h | 2 +- include/linux/soundwire/sdw_type.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5e6a876e17ba..564b62f13bd0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -723,7 +723,7 @@ const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, const void *acpi_device_get_match_data(const struct device *dev); extern bool acpi_driver_match_device(struct device *dev, const struct device_driver *drv); -int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); +int acpi_device_uevent_modalias(const struct device *, struct kobj_uevent_env *); int acpi_device_modalias(struct device *, char *, int); struct platform_device *acpi_create_platform_device(struct acpi_device *, @@ -958,7 +958,7 @@ static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, return NULL; } -static inline int acpi_device_uevent_modalias(struct device *dev, +static inline int acpi_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env) { return -ENODEV; diff --git a/include/linux/device.h b/include/linux/device.h index 44e3acae7b36..dad0614aad96 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -88,7 +88,7 @@ int subsys_virtual_register(struct bus_type *subsys, struct device_type { const char *name; const struct attribute_group **groups; - int (*uevent)(struct device *dev, struct kobj_uevent_env *env); + int (*uevent)(const struct device *dev, struct kobj_uevent_env *env); char *(*devnode)(struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid); void (*release)(struct device *dev); diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 68b558929aec..ce115ef08fec 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -303,7 +303,7 @@ int i3c_device_do_priv_xfers(struct i3c_device *dev, int i3c_device_do_setdasa(struct i3c_device *dev); -void i3c_device_get_info(struct i3c_device *dev, struct i3c_device_info *info); +void i3c_device_get_info(const struct i3c_device *dev, struct i3c_device_info *info); struct i3c_ibi_payload { unsigned int len; diff --git a/include/linux/soundwire/sdw_type.h b/include/linux/soundwire/sdw_type.h index 52eb66cd11bc..d8c27f1e5559 100644 --- a/include/linux/soundwire/sdw_type.h +++ b/include/linux/soundwire/sdw_type.h @@ -21,7 +21,7 @@ static inline int is_sdw_slave(const struct device *dev) int __sdw_register_driver(struct sdw_driver *drv, struct module *owner); void sdw_unregister_driver(struct sdw_driver *drv); -int sdw_slave_uevent(struct device *dev, struct kobj_uevent_env *env); +int sdw_slave_uevent(const struct device *dev, struct kobj_uevent_env *env); /** * module_sdw_driver() - Helper macro for registering a Soundwire driver -- cgit v1.2.3 From a9b12f8b4e3309c4c25d39e7ab818943b9c48c1c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:08 +0100 Subject: driver core: make struct device_type.devnode() take a const * The devnode() callback in struct device_type should not be modifying the device that is passed into it, so mark it as a const * and propagate the function signature changes out into all relevant subsystems that use this callback. Cc: Jens Axboe Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Ben Widawsky Cc: Jeremy Kerr Cc: Joel Stanley Cc: Alistar Popple Cc: Eddie James Cc: Jonathan Cameron Cc: Jilin Yuan Cc: Heikki Krogerus Cc: Alan Stern Cc: Andy Shevchenko Cc: Thomas Gleixner Cc: Jason Gunthorpe Cc: "Rafael J. Wysocki" Cc: Won Chung Acked-by: Dan Williams Acked-by: Alexander Shishkin Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-7-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index dad0614aad96..318861000b83 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -89,7 +89,7 @@ struct device_type { const char *name; const struct attribute_group **groups; int (*uevent)(const struct device *dev, struct kobj_uevent_env *env); - char *(*devnode)(struct device *dev, umode_t *mode, + char *(*devnode)(const struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid); void (*release)(struct device *dev); -- cgit v1.2.3 From 42bb5be8936f40a1d0e618766645e7fd0cbfe591 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:09 +0100 Subject: driver core: device_get_devnode() should take a const * device_get_devnode() should take a constant * to struct device as it does not modify it in any way, so modify the function definition to do this and move it out of device.h as it does not need to be exposed to the whole kernel tree. Cc: Thomas Gleixner Cc: Jason Gunthorpe Cc: Ira Weiny Cc: Dan Williams Cc: Won Chung Acked-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-8-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 318861000b83..90aaf308c259 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -907,8 +907,6 @@ int device_rename(struct device *dev, const char *new_name); int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid); -const char *device_get_devnode(struct device *dev, umode_t *mode, kuid_t *uid, - kgid_t *gid, const char **tmp); int device_is_dependent(struct device *dev, void *target); static inline bool device_supports_offline(struct device *dev) -- cgit v1.2.3 From 8afdae83e318f3cfb6eb1ee2ce289356936a663c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:12 +0100 Subject: drivers: hv: move device_to_hv_device to use container_of_const() The driver core is changing to pass some pointers as const, so move device_to_hv_device() to use container_of_const() to handle this change. device_to_hv_device() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Dexuan Cui Acked-by: Wei Liu Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-11-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 85f7c5a63aa6..8430e27f3c3f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1309,10 +1309,7 @@ struct hv_device { }; -static inline struct hv_device *device_to_hv_device(struct device *d) -{ - return container_of(d, struct hv_device, device); -} +#define device_to_hv_device(d) container_of_const(d, struct hv_device, device) static inline struct hv_driver *drv_to_hv_drv(struct device_driver *d) { -- cgit v1.2.3 From af3011b6637cd7e94b2420455d41ba9e66c03aed Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:13 +0100 Subject: virtio: move dev_to_virtio() to use container_of_const() The driver core is changing to pass some pointers as const, so move dev_to_virtio() to use container_of_const() to handle this change. dev_to_virtio() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. Cc: "Michael S. Tsirkin" Cc: Jason Wang Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-12-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/virtio.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index dcab9c7e8784..2b472514c49b 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -127,10 +127,7 @@ struct virtio_device { void *priv; }; -static inline struct virtio_device *dev_to_virtio(struct device *_dev) -{ - return container_of(_dev, struct virtio_device, dev); -} +#define dev_to_virtio(_dev) container_of_const(_dev, struct virtio_device, dev) void virtio_add_status(struct virtio_device *dev, unsigned int status); int register_virtio_device(struct virtio_device *dev); -- cgit v1.2.3 From deaf8b21c803b3932071a7cf9b11f36e13000f5c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:15 +0100 Subject: mcb: move to_mcb_device() to use container_of_const() The driver core is changing to pass some pointers as const, so move to_mcb_device() to use container_of_const() to handle this change. to_mcb_device() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. Cc: Johannes Thumshirn Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-14-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mcb.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mcb.h b/include/linux/mcb.h index f6efb16f9d1b..1e5893138afe 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -76,10 +76,7 @@ struct mcb_device { struct device *dma_dev; }; -static inline struct mcb_device *to_mcb_device(struct device *dev) -{ - return container_of(dev, struct mcb_device, dev); -} +#define to_mcb_device(__dev) container_of_const(__dev, struct mcb_device, dev) /** * struct mcb_driver - MEN Chameleon Bus device driver -- cgit v1.2.3 From 2a81ada32f0e584fc0c943e0d3a8c9f4fae411d6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:17 +0100 Subject: driver core: make struct bus_type.uevent() take a const * The uevent() callback in struct bus_type should not be modifying the device that is passed into it, so mark it as a const * and propagate the function signature changes out into all relevant subsystems that use this callback. Acked-by: Rafael J. Wysocki Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-16-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 2 +- include/linux/spi/spi.h | 2 +- include/linux/ssb/ssb.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 7b4a48b5159b..87e4d029c915 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -90,7 +90,7 @@ struct bus_type { const struct attribute_group **drv_groups; int (*match)(struct device *dev, struct device_driver *drv); - int (*uevent)(struct device *dev, struct kobj_uevent_env *env); + int (*uevent)(const struct device *dev, struct kobj_uevent_env *env); int (*probe)(struct device *dev); void (*sync_state)(struct device *dev); void (*remove)(struct device *dev); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 9a32495fbb1f..2edf7f09239e 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -223,7 +223,7 @@ struct spi_device { static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0, "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap"); -static inline struct spi_device *to_spi_device(struct device *dev) +static inline struct spi_device *to_spi_device(const struct device *dev) { return dev ? container_of(dev, struct spi_device, dev) : NULL; } diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index f9b53acb4e02..1f326da289d3 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -285,7 +285,7 @@ struct ssb_device { /* Go from struct device to struct ssb_device. */ static inline -struct ssb_device * dev_to_ssb_dev(struct device *dev) +struct ssb_device * dev_to_ssb_dev(const struct device *dev) { struct __ssb_dev_wrapper *wrap; wrap = container_of(dev, struct __ssb_dev_wrapper, dev); -- cgit v1.2.3 From 56d5f362ad0f8f60bc7c6fd5d7bc2b528d6963f5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:18 +0100 Subject: kobject: kset_uevent_ops: make uevent() callback take a const * The uevent() callback in struct kset_uevent_ops does not modify the kobject passed into it, so make the pointer const to enforce this restriction. When doing so, fix up all existing uevent() callbacks to have the correct signature to preserve the build. Cc: Christine Caulfield Cc: David Teigland Cc: Bob Peterson Cc: Andreas Gruenbacher Acked-by: Rafael J. Wysocki Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-17-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 58a5b75612e3..bdab370a24f4 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -137,7 +137,7 @@ struct kobj_uevent_env { struct kset_uevent_ops { int (* const filter)(const struct kobject *kobj); const char *(* const name)(const struct kobject *kobj); - int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env); + int (* const uevent)(const struct kobject *kobj, struct kobj_uevent_env *env); }; struct kobj_attribute { -- cgit v1.2.3 From 47d586913f2abec4d240bae33417f537fda987ec Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 16 Jan 2023 20:14:25 +0100 Subject: fs: Use CHECK_DATA_CORRUPTION() when kernel bugs are detected Currently, filp_close() and generic_shutdown_super() use printk() to log messages when bugs are detected. This is problematic because infrastructure like syzkaller has no idea that this message indicates a bug. In addition, some people explicitly want their kernels to BUG() when kernel data corruption has been detected (CONFIG_BUG_ON_DATA_CORRUPTION). And finally, when generic_shutdown_super() detects remaining inodes on a system without CONFIG_BUG_ON_DATA_CORRUPTION, it would be nice if later accesses to a busy inode would at least crash somewhat cleanly rather than walking through freed memory. To address all three, use CHECK_DATA_CORRUPTION() when kernel bugs are detected. Signed-off-by: Jann Horn Reviewed-by: Christian Brauner (Microsoft) Reviewed-by: Kees Cook Signed-off-by: Christian Brauner (Microsoft) --- include/linux/poison.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/poison.h b/include/linux/poison.h index 2d3249eb0e62..0e8a1f2ceb2f 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -84,4 +84,7 @@ /********** kernel/bpf/ **********/ #define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA)) +/********** VFS **********/ +#define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA)) + #endif -- cgit v1.2.3 From 37fe46051dc94c589b616018ba9d2fd4bacfbd8a Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:13:43 +0300 Subject: dmaengine: Fix dma_slave_config.dst_addr description The dst_addr member of the dma_slave_config structure has been mistakenly marked as ignored if the *source* address belongs to the memory. That is relevant to the src_addr field of the structure, while the dst_addr field contains a destination device address that should be ignored if the destination is the CPU memory. Correct the @dst_addr description accordingly. Link: https://lore.kernel.org/r/20230113171409.30470-2-Sergey.Semin@baikalelectronics.ru Tested-by: Manivannan Sadhasivam Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Acked-by: Vinod Koul --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c923f4e60f24..0c020682d894 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -394,7 +394,7 @@ enum dma_slave_buswidth { * should be read (RX), if the source is memory this argument is * ignored. * @dst_addr: this is the physical address where DMA slave data - * should be written (TX), if the source is memory this argument + * should be written (TX), if the destination is memory this argument * is ignored. * @src_addr_width: this is the width in bytes of the source (RX) * register where DMA data shall be read. If the source -- cgit v1.2.3 From 002bbaa2f60e07d465f92a163365569481d34108 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:13:45 +0300 Subject: dmaengine: dw-edma: Convert ll/dt phys address to PCI bus/DMA address The dw_edma_region.paddr field should be a memory base address visible by the DW eDMA controller. If the DMA engine is embedded in the DW PCIe Host/Endpoint controller, the address should belong to the Local CPU/ Application memory. If eDMA is remotely accessible across the PCI bus via PCI memory IOs, the address should be part of the PCI bus memory space. The latter case hasn't been well covered in the corresponding glue-driver. Since pci_dev.resource[] contains resources defined in the CPU memory space, they need to be converted to the PCI bus address space. Convert the LL, DT and CSRs PCI memory ranges with pci_bus_address(). In addition, extend the dw_edma_region.paddr field size. The field normally contains a memory range base address to be set in the DW eDMA Linked-List pointer register or as a base address of the Linked-List data buffer. In accordance with [1] the LL range is supposed to be created in the Local CPU/Application memory, but depending on the DW eDMA utilization the memory can be created as a part of the PCI bus address space (as in the case of the DW PCIe Endpoint prototype kit). In the former case dw_edma_region.paddr should be a dma_addr_t, while in the latter one it should be a pci_bus_addr_t. Since the corresponding CSRs are always 64 bits wide, convert dw_edma_region.paddr to be u64, and let the client make sure it has a valid address visible by the DW eDMA controller. For instance, the DW eDMA PCIe glue-driver initializes the field with addresses from the PCI bus memory space. [1] DesignWare Cores PCI Express Controller Databook - DWC PCIe Root Port, v.5.40a, March 2019, p.1103 Link: https://lore.kernel.org/r/20230113171409.30470-4-Sergey.Semin@baikalelectronics.ru Fixes: 41aaff2a2ac0 ("dmaengine: Add Synopsys eDMA IP PCIe glue-logic") Tested-by: Manivannan Sadhasivam Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Acked-by: Vinod Koul --- include/linux/dma/edma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index 7d8062e9c544..a864978ddd27 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -18,7 +18,7 @@ struct dw_edma; struct dw_edma_region { - phys_addr_t paddr; + u64 paddr; void __iomem *vaddr; size_t sz; }; -- cgit v1.2.3 From 993d57bbaacf7ad7a742e51cb717e21e0eb4ef72 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:13:49 +0300 Subject: dmaengine: dw-edma: Add CPU to PCI bus address translation Since 9575632052ba ("dmaengine: make slave address physical"), the source and destination addresses of the DMA slave device have been converted to physical addresses in the CPU address space. It's the DMA device driver's responsibility to convert them to the DMA bus address space. In case of the DW eDMA device, the source or destination peripheral (slave) devices reside in PCI bus space. Thus we need to perform the PCI Host/Endpoint windows- based (i.e. DT "ranges" property) address translation; otherwise the eDMA transactions won't work as expected (or can be even harmful) if the CPU and PCI address spaces don't match. Note 1: Even though the DMA interleaved template has both source and destination addresses declared as dma_addr_t, only the CPU memory range should be mapped to be seen by the DMA device since it's a subject of the DMA getting towards the system side. The device part must not be mapped since the slave device resides in the PCI bus space, which isn't affected by IOMMUs or iATU translations. DW PCIe eDMA generates corresponding MWr/MRd TLPs on its own. Note 2: This functionality is mainly required for the remote eDMA setup since the CPU address must be manually translated into the PCI bus space before being written to LLI.{SAR,DAR}. If eDMA is embedded in the locally accessible DW PCIe Root Port/Endpoint, software-based translation isn't required since hardware will translate it via the Outbound iATU as long as the DMA_BYPASS flag is cleared. If DMA_BYPASS is set or there is no Outbound iATU entry that contains the SAR or DAR (for Read and Write channel respectively), there won't be any translation performed but DMA will proceed with the corresponding source/destination address as-is. Link: https://lore.kernel.org/r/20230113171409.30470-8-Sergey.Semin@baikalelectronics.ru Tested-by: Manivannan Sadhasivam Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Acked-by: Vinod Koul --- include/linux/dma/edma.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index a864978ddd27..07a23ecc834f 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -23,8 +23,23 @@ struct dw_edma_region { size_t sz; }; +/** + * struct dw_edma_core_ops - platform-specific eDMA methods + * @irq_vector: Get IRQ number of the passed eDMA channel. Note the + * method accepts the channel id in the end-to-end + * numbering with the eDMA write channels being placed + * first in the row. + * @pci_address: Get PCIe bus address corresponding to the passed CPU + * address. Note there is no need in specifying this + * function if the address translation is performed by + * the DW PCIe RP/EP controller with the DW eDMA device in + * subject and DMA_BYPASS isn't set for all the outbound + * iATU windows. That will be done by the controller + * automatically. + */ struct dw_edma_core_ops { int (*irq_vector)(struct device *dev, unsigned int nr); + u64 (*pci_address)(struct device *dev, phys_addr_t cpu_addr); }; enum dw_edma_map_format { -- cgit v1.2.3 From 18b1746bddf5e7f6b2618966596d9517172a5cd7 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Thu, 26 Jan 2023 00:28:03 +0200 Subject: RDMA/mlx5: Remove implicit ODP cache entry Implicit ODP mkey doesn't have unique properties. It shares the same properties as the order 18 cache entry. There is no need to devote a special entry for that. Link: https://lore.kernel.org/r/20230125222807.6921-3-michaelgur@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d476255c9a3f..f79c20d50eb4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -734,7 +734,6 @@ enum { enum { MKEY_CACHE_LAST_STD_ENTRY = 20, - MLX5_IMR_MTT_CACHE_ENTRY, MLX5_IMR_KSM_CACHE_ENTRY, MAX_MKEY_CACHE_ENTRIES }; -- cgit v1.2.3 From 5d0f0e57ed900917836385527ce5b122fa1425a3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 27 Jan 2023 14:15:29 -0800 Subject: fsverity: support verifying data from large folios Try to make fs/verity/verify.c aware of large folios. This includes making fsverity_verify_bio() support the case where the bio contains large folios, and adding a function fsverity_verify_folio() which is the equivalent of fsverity_verify_page(). There's no way to actually test this with large folios yet, but I've tested that this doesn't cause any regressions. Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20230127221529.299560-1-ebiggers@kernel.org --- include/linux/fsverity.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 991a44458996..119a3266791f 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -12,6 +12,7 @@ #define _LINUX_FSVERITY_H #include +#include #include #include #include @@ -169,8 +170,7 @@ int fsverity_ioctl_read_metadata(struct file *filp, const void __user *uarg); /* verify.c */ -bool fsverity_verify_blocks(struct page *page, unsigned int len, - unsigned int offset); +bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset); void fsverity_verify_bio(struct bio *bio); void fsverity_enqueue_verify_work(struct work_struct *work); @@ -230,8 +230,8 @@ static inline int fsverity_ioctl_read_metadata(struct file *filp, /* verify.c */ -static inline bool fsverity_verify_blocks(struct page *page, unsigned int len, - unsigned int offset) +static inline bool fsverity_verify_blocks(struct folio *folio, size_t len, + size_t offset) { WARN_ON(1); return false; @@ -249,9 +249,14 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work) #endif /* !CONFIG_FS_VERITY */ +static inline bool fsverity_verify_folio(struct folio *folio) +{ + return fsverity_verify_blocks(folio, folio_size(folio), 0); +} + static inline bool fsverity_verify_page(struct page *page) { - return fsverity_verify_blocks(page, PAGE_SIZE, 0); + return fsverity_verify_blocks(page_folio(page), PAGE_SIZE, 0); } /** -- cgit v1.2.3 From da27f796a832122ee533c7685438dad1c4e338dd Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 27 Jan 2023 13:46:51 -0500 Subject: ipc,namespace: batch free ipc_namespace structures Instead of waiting for an RCU grace period between each ipc_namespace structure that is being freed, wait an RCU grace period for every batch of ipc_namespace structures. Thanks to Al Viro for the suggestion of the helper function. This speeds up the run time of the test case that allocates ipc_namespaces in a loop from 6 minutes, to a little over 1 second: real 0m1.192s user 0m0.038s sys 0m1.152s Signed-off-by: Rik van Riel Reported-by: Chris Mason Tested-by: Giuseppe Scrivano Suggested-by: Al Viro Signed-off-by: Al Viro --- include/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 62475996fac6..ec55a031aa8c 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -88,6 +88,7 @@ extern void mnt_drop_write(struct vfsmount *mnt); extern void mnt_drop_write_file(struct file *file); extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); +extern void mnt_make_shortterm(struct vfsmount *mnt); extern struct vfsmount *mnt_clone_internal(const struct path *path); extern bool __mnt_is_readonly(struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); -- cgit v1.2.3 From 622bd6ea90086beb98ac439edd7d57de73d1d6f9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 26 Jan 2023 17:22:59 +0100 Subject: at86rf230: convert to gpio descriptors There are no remaining in-tree users of the platform_data, so this driver can be converted to using the simpler gpiod interfaces. Any out-of-tree users that rely on the platform data can provide the data using the device_property and gpio_lookup interfaces instead. Reviewed-by: Miquel Raynal Reviewed-by: Andy Shevchenko Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230126162323.2986682-1-arnd@kernel.org Signed-off-by: Stefan Schmidt --- include/linux/spi/at86rf230.h | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 include/linux/spi/at86rf230.h (limited to 'include/linux') diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h deleted file mode 100644 index d278576ab692..000000000000 --- a/include/linux/spi/at86rf230.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AT86RF230/RF231 driver - * - * Copyright (C) 2009-2012 Siemens AG - * - * Written by: - * Dmitry Eremin-Solenikov - */ -#ifndef AT86RF230_H -#define AT86RF230_H - -struct at86rf230_platform_data { - int rstn; - int slp_tr; - int dig2; - u8 xtal_trim; -}; - -#endif -- cgit v1.2.3 From 569653f022a29a1a44ea9de5308b657228303fa5 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Jan 2023 10:40:09 +0000 Subject: nvmem: core: remove nvmem_config wp_gpio No one provides wp_gpio, so let's remove it to avoid issues with the nvmem core putting this gpio. Cc: stable@vger.kernel.org Signed-off-by: Russell King (Oracle) Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230127104015.23839-5-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 50caa117cb62..bb15c9234e21 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -70,7 +70,6 @@ struct nvmem_keepout { * @word_size: Minimum read/write access granularity. * @stride: Minimum read/write access stride. * @priv: User context passed to read/write callbacks. - * @wp-gpio: Write protect pin * @ignore_wp: Write Protect pin is managed by the provider. * * Note: A default "nvmem" name will be assigned to the device if @@ -85,7 +84,6 @@ struct nvmem_config { const char *name; int id; struct module *owner; - struct gpio_desc *wp_gpio; const struct nvmem_cell_info *cells; int ncells; const struct nvmem_keepout *keepout; -- cgit v1.2.3 From 390a07a921b3b1fe544cb2921de7abb94ebb7f26 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Sat, 28 Jan 2023 01:06:21 +0100 Subject: bpf: Change BPF_MAX_TRAMP_LINKS to enum This way it's possible to query its value from testcases using BTF. Signed-off-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20230128000650.1516334-3-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f7f24defccb8..34551e4ebc72 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -939,7 +939,9 @@ struct btf_func_model { /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ -#define BPF_MAX_TRAMP_LINKS 38 +enum { + BPF_MAX_TRAMP_LINKS = 38, +}; struct bpf_tramp_links { struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; -- cgit v1.2.3 From 49f67f393ff264e8d83f6fcec0728a6aa8eed102 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Sat, 28 Jan 2023 01:06:44 +0100 Subject: bpf: btf: Add BTF_FMODEL_SIGNED_ARG flag s390x eBPF JIT needs to know whether a function return value is signed and which function arguments are signed, in order to generate code compliant with the s390x ABI. Signed-off-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20230128000650.1516334-26-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++++ include/linux/btf.h | 15 ++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 34551e4ebc72..c411c6bb86c4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -899,8 +899,12 @@ enum bpf_cgroup_storage_type { /* The argument is a structure. */ #define BTF_FMODEL_STRUCT_ARG BIT(0) +/* The argument is signed. */ +#define BTF_FMODEL_SIGNED_ARG BIT(1) + struct btf_func_model { u8 ret_size; + u8 ret_flags; u8 nr_args; u8 arg_size[MAX_BPF_FUNC_ARGS]; u8 arg_flags[MAX_BPF_FUNC_ARGS]; diff --git a/include/linux/btf.h b/include/linux/btf.h index 5f628f323442..e9b90d9c3569 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -236,6 +236,16 @@ static inline bool btf_type_is_small_int(const struct btf_type *t) return btf_type_is_int(t) && t->size <= sizeof(u64); } +static inline u8 btf_int_encoding(const struct btf_type *t) +{ + return BTF_INT_ENCODING(*(u32 *)(t + 1)); +} + +static inline bool btf_type_is_signed_int(const struct btf_type *t) +{ + return btf_type_is_int(t) && (btf_int_encoding(t) & BTF_INT_SIGNED); +} + static inline bool btf_type_is_enum(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; @@ -306,11 +316,6 @@ static inline u8 btf_int_offset(const struct btf_type *t) return BTF_INT_OFFSET(*(u32 *)(t + 1)); } -static inline u8 btf_int_encoding(const struct btf_type *t) -{ - return BTF_INT_ENCODING(*(u32 *)(t + 1)); -} - static inline bool btf_type_is_scalar(const struct btf_type *t) { return btf_type_is_int(t) || btf_type_is_enum(t); -- cgit v1.2.3 From 51e4e3153ebc32d3280d5d17418ae6f1a44f1ec1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 27 Jan 2023 14:25:14 -0800 Subject: fscrypt: support decrypting data from large folios Try to make the filesystem-level decryption functions in fs/crypto/ aware of large folios. This includes making fscrypt_decrypt_bio() support the case where the bio contains large folios, and making fscrypt_decrypt_pagecache_blocks() take a folio instead of a page. There's no way to actually test this with large folios yet, but I've tested that this doesn't cause any regressions. Note that this patch just handles *decryption*, not encryption which will be a little more difficult. Signed-off-by: Eric Biggers Reviewed-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20230127224202.355629-1-ebiggers@kernel.org --- include/linux/fscrypt.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 4f5f8a651213..433504422d02 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -257,8 +257,8 @@ int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags); -int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len, - unsigned int offs); +int fscrypt_decrypt_pagecache_blocks(struct folio *folio, size_t len, + size_t offs); int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num); @@ -422,9 +422,8 @@ static inline int fscrypt_encrypt_block_inplace(const struct inode *inode, return -EOPNOTSUPP; } -static inline int fscrypt_decrypt_pagecache_blocks(struct page *page, - unsigned int len, - unsigned int offs) +static inline int fscrypt_decrypt_pagecache_blocks(struct folio *folio, + size_t len, size_t offs) { return -EOPNOTSUPP; } -- cgit v1.2.3 From dde40322ae20f2d6b0bcb781a9eedcfc7ca3aa73 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 9 Jan 2023 14:46:03 +0000 Subject: io_uring: move submitter_task out of cold cacheline ->submitter_task is used somewhat more frequent now than before, i.e. for local tw enqueue and run, let's move it from the end of ctx, which is full of cold data, to the first cacheline with mostly constants. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/415ca91dc5ad1dec612b892e489cda98e1069542.1673274244.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 128a67a40065..8dfb6c4a35d9 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -195,11 +195,7 @@ struct io_alloc_cache { struct io_ring_ctx { /* const or read-mostly hot data */ struct { - struct percpu_ref refs; - - struct io_rings *rings; unsigned int flags; - enum task_work_notify_mode notify_method; unsigned int compat: 1; unsigned int drain_next: 1; unsigned int restricted: 1; @@ -210,6 +206,11 @@ struct io_ring_ctx { unsigned int syscall_iopoll: 1; /* all CQEs should be posted only by the submitter task */ unsigned int task_complete: 1; + + enum task_work_notify_mode notify_method; + struct io_rings *rings; + struct task_struct *submitter_task; + struct percpu_ref refs; } ____cacheline_aligned_in_smp; /* submission data */ @@ -320,7 +321,6 @@ struct io_ring_ctx { /* Keep this last, we don't need it for the fast path */ struct io_restriction restrictions; - struct task_struct *submitter_task; /* slow path rsrc auxilary data, used by update/register */ struct io_rsrc_node *rsrc_backup_node; -- cgit v1.2.3 From 7b235dd82ad32c1626e51303d94ec5ef4d7bc994 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 9 Jan 2023 14:46:08 +0000 Subject: io_uring: separate wq for ring polling Don't use ->cq_wait for ring polling but add a separate wait queue for it. We need it for following patches. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/dea0be0bf990503443c5c6c337fc66824af7d590.1673274244.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 8dfb6c4a35d9..0d94ee191c15 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -319,7 +319,7 @@ struct io_ring_ctx { } ____cacheline_aligned_in_smp; /* Keep this last, we don't need it for the fast path */ - + struct wait_queue_head poll_wq; struct io_restriction restrictions; /* slow path rsrc auxilary data, used by update/register */ -- cgit v1.2.3 From bca39f39058567643487cd654970717705784ba3 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 9 Jan 2023 14:46:09 +0000 Subject: io_uring: add lazy poll_wq activation Even though io_poll_wq_wake()'s waitqueue_active reuses a barrier we do for another waitqueue, it's not going to be the case in the future and so we want to have a fast path for it when the ring has never been polled. Move poll_wq wake ups into __io_commit_cqring_flush() using a new flag called ->poll_activated. The idea behind the flag is to set it when the ring was polled for the first time. This requires additional sync to not miss events, which is done here by using task_work for ->task_complete rings, and by default enabling the flag for all other types of rings. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/060785e8e9137a920b232c0c7f575b131af19cac.1673274244.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 0d94ee191c15..7b5e90520278 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -206,6 +206,7 @@ struct io_ring_ctx { unsigned int syscall_iopoll: 1; /* all CQEs should be posted only by the submitter task */ unsigned int task_complete: 1; + unsigned int poll_activated: 1; enum task_work_notify_mode notify_method; struct io_rings *rings; @@ -357,6 +358,7 @@ struct io_ring_ctx { u32 iowq_limits[2]; bool iowq_limits_set; + struct callback_head poll_wq_task_work; struct list_head defer_list; unsigned sq_thread_idle; /* protected by ->completion_lock */ -- cgit v1.2.3 From d80c0f00d04766972c95e72b7535a842d6f4680d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 9 Jan 2023 14:46:12 +0000 Subject: io_uring: add io_req_local_work_add wake fast path Don't wake the master task after queueing a deferred tw unless it's currently waiting in io_cqring_wait. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/717702d772825a6647e6c315b4690277ba84c3fc.1673274244.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 7b5e90520278..cc0cf0705b8f 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -295,6 +295,7 @@ struct io_ring_ctx { spinlock_t completion_lock; bool poll_multi_queue; + bool cq_waiting; /* * ->iopoll_list is protected by the ctx->uring_lock for -- cgit v1.2.3 From 632ffe0956740ba56ae3f83778f3bf97edd57c69 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 16 Jan 2023 16:49:00 +0000 Subject: io_uring: optimise ctx flags layout There may be different cost for reeading just one byte or more, so it's benificial to keep ctx flag bits that we access together in a single byte. That affected code generation of __io_cq_unlock_post_flush() and removed one memory load. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/bbe8ca4705704690319d65e45845f9fc9d35f420.1673887636.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index cc0cf0705b8f..0efe4d784358 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -196,17 +196,17 @@ struct io_ring_ctx { /* const or read-mostly hot data */ struct { unsigned int flags; - unsigned int compat: 1; unsigned int drain_next: 1; unsigned int restricted: 1; unsigned int off_timeout_used: 1; unsigned int drain_active: 1; - unsigned int drain_disabled: 1; unsigned int has_evfd: 1; - unsigned int syscall_iopoll: 1; /* all CQEs should be posted only by the submitter task */ unsigned int task_complete: 1; + unsigned int syscall_iopoll: 1; unsigned int poll_activated: 1; + unsigned int drain_disabled: 1; + unsigned int compat: 1; enum task_work_notify_mode notify_method; struct io_rings *rings; -- cgit v1.2.3 From 0a26f327e46c203229e72c823dfec71a2b405ec5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 5 Jan 2023 12:51:45 -0800 Subject: block: make BLK_DEF_MAX_SECTORS unsigned This is used as an unsigned value, so define it that way to avoid having to cast it. Suggested-by: Christoph Hellwig Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20230105205146.3610282-2-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 43d4e073b111..2b85161e2256 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1095,11 +1095,12 @@ static inline bool bdev_is_partition(struct block_device *bdev) enum blk_default_limits { BLK_MAX_SEGMENTS = 128, BLK_SAFE_MAX_SECTORS = 255, - BLK_DEF_MAX_SECTORS = 2560, BLK_MAX_SEGMENT_SIZE = 65536, BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, }; +#define BLK_DEF_MAX_SECTORS 2560u + static inline unsigned long queue_segment_boundary(const struct request_queue *q) { return q->limits.seg_boundary_mask; -- cgit v1.2.3 From c9c77418a98273fe96835c42666f7427b3883f48 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 5 Jan 2023 12:51:46 -0800 Subject: block: save user max_sectors limit The user can set the max_sectors limit to any valid value via sysfs /sys/block//queue/max_sectors_kb attribute. If the device limits are ever rescanned, though, the limit reverts back to the potentially artificially low BLK_DEF_MAX_SECTORS value. Preserve the user's setting as the max_sectors limit as long as it's valid. The user can reset back to defaults by writing 0 to the sysfs file. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20230105205146.3610282-3-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2b85161e2256..b87ed829ab94 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -288,6 +288,7 @@ struct queue_limits { unsigned int max_dev_sectors; unsigned int chunk_sectors; unsigned int max_sectors; + unsigned int max_user_sectors; unsigned int max_segment_size; unsigned int physical_block_size; unsigned int logical_block_size; -- cgit v1.2.3 From 4e2da933b9f19d8098374515ee0984a20202e674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=B6hmwalder?= Date: Fri, 13 Jan 2023 13:35:05 +0100 Subject: drbd: drop API_VERSION define MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the genetlink api version as defined in drbd_genl_api.h. Signed-off-by: Christoph Böhmwalder Reviewed-by: Joel Colledge Link: https://lore.kernel.org/r/20230113123506.144082-3-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- include/linux/drbd.h | 1 - include/linux/drbd_genl_api.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 5755537b51b1..df65a8f5228a 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -40,7 +40,6 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.4.11" -#define API_VERSION 1 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 101 diff --git a/include/linux/drbd_genl_api.h b/include/linux/drbd_genl_api.h index bd62efc29002..70682c058027 100644 --- a/include/linux/drbd_genl_api.h +++ b/include/linux/drbd_genl_api.h @@ -47,7 +47,7 @@ enum drbd_state_info_bcast_reason { #undef linux #include -#define GENL_MAGIC_VERSION API_VERSION +#define GENL_MAGIC_VERSION 1 #define GENL_MAGIC_FAMILY drbd #define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr) #define GENL_MAGIC_INCLUDE_FILE -- cgit v1.2.3 From 20f2a34a421b1716b96d1e34d4f4948bf4b4ba1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=B6hmwalder?= Date: Fri, 13 Jan 2023 13:35:06 +0100 Subject: drbd: split off drbd_config into separate file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be more similar to what we do in the out-of-tree module and ease the upstreaming process. Signed-off-by: Christoph Böhmwalder Reviewed-by: Joel Colledge Link: https://lore.kernel.org/r/20230113123506.144082-4-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- include/linux/drbd.h | 6 ------ include/linux/drbd_config.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) create mode 100644 include/linux/drbd_config.h (limited to 'include/linux') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index df65a8f5228a..5468a2399d48 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -38,12 +38,6 @@ #endif -extern const char *drbd_buildtag(void); -#define REL_VERSION "8.4.11" -#define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 101 - - enum drbd_io_error_p { EP_PASS_ON, /* FIXME should the better be named "Ignore"? */ EP_CALL_HELPER, diff --git a/include/linux/drbd_config.h b/include/linux/drbd_config.h new file mode 100644 index 000000000000..d215365c6bb1 --- /dev/null +++ b/include/linux/drbd_config.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * drbd_config.h + * DRBD's compile time configuration. + */ + +#ifndef DRBD_CONFIG_H +#define DRBD_CONFIG_H + +extern const char *drbd_buildtag(void); + +#define REL_VERSION "8.4.11" +#define PRO_VERSION_MIN 86 +#define PRO_VERSION_MAX 101 + +#endif -- cgit v1.2.3 From 37800068673220326fcdf7f40ccf05ca54854585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=B6hmwalder?= Date: Fri, 13 Jan 2023 13:35:31 +0100 Subject: drbd: adjust drbd_limits license header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See also commit 93c68cc46a07 ("drbd: use consistent license"). We only want to license drbd under GPL-2.0, so use the corresponding SPDX header consistently. Signed-off-by: Christoph Böhmwalder Reviewed-by: Joel Colledge Link: https://lore.kernel.org/r/20230113123538.144276-2-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 9e33f7038bea..d64271ccece4 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* drbd_limits.h This file is part of DRBD by Philipp Reisner and Lars Ellenberg. -- cgit v1.2.3 From 2167879655b3a9a0a970d50b202e45f7fc45d092 Mon Sep 17 00:00:00 2001 From: Robert Altnoeder Date: Fri, 13 Jan 2023 13:35:32 +0100 Subject: drbd: fix DRBD_VOLUME_MAX 65535 -> 65534 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The protocol uses -1 as a reserved value for 'no specific volume', and since the protocol field is a 16 bit unsigned value, -1 is converted to 65535. Therefore, limit the range of valid volume numbers to [0, 65534]. Signed-off-by: Robert Altnoeder Signed-off-by: Christoph Böhmwalder Reviewed-by: Joel Colledge Link: https://lore.kernel.org/r/20230113123538.144276-3-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index d64271ccece4..058f7600f79c 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -21,7 +21,7 @@ #define DRBD_MINOR_COUNT_DEF 32 #define DRBD_MINOR_COUNT_SCALE '1' -#define DRBD_VOLUME_MAX 65535 +#define DRBD_VOLUME_MAX 65534 #define DRBD_DIALOG_REFRESH_MIN 0 #define DRBD_DIALOG_REFRESH_MAX 600 -- cgit v1.2.3 From c10bdcf9838e556f54eee63253f629028e01dee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=B6hmwalder?= Date: Fri, 13 Jan 2023 13:35:33 +0100 Subject: drbd: make limits unsigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are almost always used as unsigned integers, so mark them as such. Signed-off-by: Christoph Böhmwalder Reviewed-by: Joel Colledge Link: https://lore.kernel.org/r/20230113123538.144276-4-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- include/linux/drbd_limits.h | 202 ++++++++++++++++++++++---------------------- 1 file changed, 101 insertions(+), 101 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 058f7600f79c..5b042fb427e9 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -16,123 +16,123 @@ #define DEBUG_RANGE_CHECK 0 -#define DRBD_MINOR_COUNT_MIN 1 -#define DRBD_MINOR_COUNT_MAX 255 -#define DRBD_MINOR_COUNT_DEF 32 +#define DRBD_MINOR_COUNT_MIN 1U +#define DRBD_MINOR_COUNT_MAX 255U +#define DRBD_MINOR_COUNT_DEF 32U #define DRBD_MINOR_COUNT_SCALE '1' -#define DRBD_VOLUME_MAX 65534 +#define DRBD_VOLUME_MAX 65534U -#define DRBD_DIALOG_REFRESH_MIN 0 -#define DRBD_DIALOG_REFRESH_MAX 600 +#define DRBD_DIALOG_REFRESH_MIN 0U +#define DRBD_DIALOG_REFRESH_MAX 600U #define DRBD_DIALOG_REFRESH_SCALE '1' /* valid port number */ -#define DRBD_PORT_MIN 1 -#define DRBD_PORT_MAX 0xffff +#define DRBD_PORT_MIN 1U +#define DRBD_PORT_MAX 0xffffU #define DRBD_PORT_SCALE '1' /* startup { */ /* if you want more than 3.4 days, disable */ -#define DRBD_WFC_TIMEOUT_MIN 0 -#define DRBD_WFC_TIMEOUT_MAX 300000 -#define DRBD_WFC_TIMEOUT_DEF 0 +#define DRBD_WFC_TIMEOUT_MIN 0U +#define DRBD_WFC_TIMEOUT_MAX 300000U +#define DRBD_WFC_TIMEOUT_DEF 0U #define DRBD_WFC_TIMEOUT_SCALE '1' -#define DRBD_DEGR_WFC_TIMEOUT_MIN 0 -#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000 -#define DRBD_DEGR_WFC_TIMEOUT_DEF 0 +#define DRBD_DEGR_WFC_TIMEOUT_MIN 0U +#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000U +#define DRBD_DEGR_WFC_TIMEOUT_DEF 0U #define DRBD_DEGR_WFC_TIMEOUT_SCALE '1' -#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0 -#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000 -#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0 +#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0U +#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000U +#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0U #define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1' /* }*/ /* net { */ /* timeout, unit centi seconds * more than one minute timeout is not useful */ -#define DRBD_TIMEOUT_MIN 1 -#define DRBD_TIMEOUT_MAX 600 -#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ +#define DRBD_TIMEOUT_MIN 1U +#define DRBD_TIMEOUT_MAX 600U +#define DRBD_TIMEOUT_DEF 60U /* 6 seconds */ #define DRBD_TIMEOUT_SCALE '1' /* If backing disk takes longer than disk_timeout, mark the disk as failed */ -#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ -#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ -#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */ +#define DRBD_DISK_TIMEOUT_MIN 0U /* 0 = disabled */ +#define DRBD_DISK_TIMEOUT_MAX 6000U /* 10 Minutes */ +#define DRBD_DISK_TIMEOUT_DEF 0U /* disabled */ #define DRBD_DISK_TIMEOUT_SCALE '1' /* active connection retries when C_WF_CONNECTION */ -#define DRBD_CONNECT_INT_MIN 1 -#define DRBD_CONNECT_INT_MAX 120 -#define DRBD_CONNECT_INT_DEF 10 /* seconds */ +#define DRBD_CONNECT_INT_MIN 1U +#define DRBD_CONNECT_INT_MAX 120U +#define DRBD_CONNECT_INT_DEF 10U /* seconds */ #define DRBD_CONNECT_INT_SCALE '1' /* keep-alive probes when idle */ -#define DRBD_PING_INT_MIN 1 -#define DRBD_PING_INT_MAX 120 -#define DRBD_PING_INT_DEF 10 +#define DRBD_PING_INT_MIN 1U +#define DRBD_PING_INT_MAX 120U +#define DRBD_PING_INT_DEF 10U #define DRBD_PING_INT_SCALE '1' /* timeout for the ping packets.*/ -#define DRBD_PING_TIMEO_MIN 1 -#define DRBD_PING_TIMEO_MAX 300 -#define DRBD_PING_TIMEO_DEF 5 +#define DRBD_PING_TIMEO_MIN 1U +#define DRBD_PING_TIMEO_MAX 300U +#define DRBD_PING_TIMEO_DEF 5U #define DRBD_PING_TIMEO_SCALE '1' /* max number of write requests between write barriers */ -#define DRBD_MAX_EPOCH_SIZE_MIN 1 -#define DRBD_MAX_EPOCH_SIZE_MAX 20000 -#define DRBD_MAX_EPOCH_SIZE_DEF 2048 +#define DRBD_MAX_EPOCH_SIZE_MIN 1U +#define DRBD_MAX_EPOCH_SIZE_MAX 20000U +#define DRBD_MAX_EPOCH_SIZE_DEF 2048U #define DRBD_MAX_EPOCH_SIZE_SCALE '1' /* I don't think that a tcp send buffer of more than 10M is useful */ -#define DRBD_SNDBUF_SIZE_MIN 0 -#define DRBD_SNDBUF_SIZE_MAX (10<<20) -#define DRBD_SNDBUF_SIZE_DEF 0 +#define DRBD_SNDBUF_SIZE_MIN 0U +#define DRBD_SNDBUF_SIZE_MAX (10U<<20) +#define DRBD_SNDBUF_SIZE_DEF 0U #define DRBD_SNDBUF_SIZE_SCALE '1' -#define DRBD_RCVBUF_SIZE_MIN 0 -#define DRBD_RCVBUF_SIZE_MAX (10<<20) -#define DRBD_RCVBUF_SIZE_DEF 0 +#define DRBD_RCVBUF_SIZE_MIN 0U +#define DRBD_RCVBUF_SIZE_MAX (10U<<20) +#define DRBD_RCVBUF_SIZE_DEF 0U #define DRBD_RCVBUF_SIZE_SCALE '1' /* @4k PageSize -> 128kB - 512MB */ -#define DRBD_MAX_BUFFERS_MIN 32 -#define DRBD_MAX_BUFFERS_MAX 131072 -#define DRBD_MAX_BUFFERS_DEF 2048 +#define DRBD_MAX_BUFFERS_MIN 32U +#define DRBD_MAX_BUFFERS_MAX 131072U +#define DRBD_MAX_BUFFERS_DEF 2048U #define DRBD_MAX_BUFFERS_SCALE '1' /* @4k PageSize -> 4kB - 512MB */ -#define DRBD_UNPLUG_WATERMARK_MIN 1 -#define DRBD_UNPLUG_WATERMARK_MAX 131072 +#define DRBD_UNPLUG_WATERMARK_MIN 1U +#define DRBD_UNPLUG_WATERMARK_MAX 131072U #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16) #define DRBD_UNPLUG_WATERMARK_SCALE '1' /* 0 is disabled. * 200 should be more than enough even for very short timeouts */ -#define DRBD_KO_COUNT_MIN 0 -#define DRBD_KO_COUNT_MAX 200 -#define DRBD_KO_COUNT_DEF 7 +#define DRBD_KO_COUNT_MIN 0U +#define DRBD_KO_COUNT_MAX 200U +#define DRBD_KO_COUNT_DEF 7U #define DRBD_KO_COUNT_SCALE '1' /* } */ /* syncer { */ /* FIXME allow rate to be zero? */ -#define DRBD_RESYNC_RATE_MIN 1 +#define DRBD_RESYNC_RATE_MIN 1U /* channel bonding 10 GbE, or other hardware */ #define DRBD_RESYNC_RATE_MAX (4 << 20) -#define DRBD_RESYNC_RATE_DEF 250 +#define DRBD_RESYNC_RATE_DEF 250U #define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */ -#define DRBD_AL_EXTENTS_MIN 67 +#define DRBD_AL_EXTENTS_MIN 67U /* we use u16 as "slot number", (u16)~0 is "FREE". * If you use >= 292 kB on-disk ring buffer, * this is the maximum you can use: */ -#define DRBD_AL_EXTENTS_MAX 0xfffe -#define DRBD_AL_EXTENTS_DEF 1237 +#define DRBD_AL_EXTENTS_MAX 0xfffeU +#define DRBD_AL_EXTENTS_DEF 1237U #define DRBD_AL_EXTENTS_SCALE '1' #define DRBD_MINOR_NUMBER_MIN -1 @@ -147,9 +147,9 @@ * the upper limit with 64bit kernel, enough ram and flexible meta data * is 1 PiB, currently. */ /* DRBD_MAX_SECTORS */ -#define DRBD_DISK_SIZE_MIN 0 -#define DRBD_DISK_SIZE_MAX (1 * (2LLU << 40)) -#define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */ +#define DRBD_DISK_SIZE_MIN 0LLU +#define DRBD_DISK_SIZE_MAX (1LLU * (2LLU << 40)) +#define DRBD_DISK_SIZE_DEF 0LLU /* = disabled = no user size... */ #define DRBD_DISK_SIZE_SCALE 's' /* sectors */ #define DRBD_ON_IO_ERROR_DEF EP_DETACH @@ -162,39 +162,39 @@ #define DRBD_ON_CONGESTION_DEF OC_BLOCK #define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL -#define DRBD_MAX_BIO_BVECS_MIN 0 -#define DRBD_MAX_BIO_BVECS_MAX 128 -#define DRBD_MAX_BIO_BVECS_DEF 0 +#define DRBD_MAX_BIO_BVECS_MIN 0U +#define DRBD_MAX_BIO_BVECS_MAX 128U +#define DRBD_MAX_BIO_BVECS_DEF 0U #define DRBD_MAX_BIO_BVECS_SCALE '1' -#define DRBD_C_PLAN_AHEAD_MIN 0 -#define DRBD_C_PLAN_AHEAD_MAX 300 -#define DRBD_C_PLAN_AHEAD_DEF 20 +#define DRBD_C_PLAN_AHEAD_MIN 0U +#define DRBD_C_PLAN_AHEAD_MAX 300U +#define DRBD_C_PLAN_AHEAD_DEF 20U #define DRBD_C_PLAN_AHEAD_SCALE '1' -#define DRBD_C_DELAY_TARGET_MIN 1 -#define DRBD_C_DELAY_TARGET_MAX 100 -#define DRBD_C_DELAY_TARGET_DEF 10 +#define DRBD_C_DELAY_TARGET_MIN 1U +#define DRBD_C_DELAY_TARGET_MAX 100U +#define DRBD_C_DELAY_TARGET_DEF 10U #define DRBD_C_DELAY_TARGET_SCALE '1' -#define DRBD_C_FILL_TARGET_MIN 0 -#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ -#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */ +#define DRBD_C_FILL_TARGET_MIN 0U +#define DRBD_C_FILL_TARGET_MAX (1U<<20) /* 500MByte in sec */ +#define DRBD_C_FILL_TARGET_DEF 100U /* Try to place 50KiB in socket send buffer during resync */ #define DRBD_C_FILL_TARGET_SCALE 's' /* sectors */ -#define DRBD_C_MAX_RATE_MIN 250 -#define DRBD_C_MAX_RATE_MAX (4 << 20) -#define DRBD_C_MAX_RATE_DEF 102400 +#define DRBD_C_MAX_RATE_MIN 250U +#define DRBD_C_MAX_RATE_MAX (4U << 20) +#define DRBD_C_MAX_RATE_DEF 102400U #define DRBD_C_MAX_RATE_SCALE 'k' /* kilobytes */ -#define DRBD_C_MIN_RATE_MIN 0 -#define DRBD_C_MIN_RATE_MAX (4 << 20) -#define DRBD_C_MIN_RATE_DEF 250 +#define DRBD_C_MIN_RATE_MIN 0U +#define DRBD_C_MIN_RATE_MAX (4U << 20) +#define DRBD_C_MIN_RATE_DEF 250U #define DRBD_C_MIN_RATE_SCALE 'k' /* kilobytes */ -#define DRBD_CONG_FILL_MIN 0 -#define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ -#define DRBD_CONG_FILL_DEF 0 +#define DRBD_CONG_FILL_MIN 0U +#define DRBD_CONG_FILL_MAX (10U<<21) /* 10GByte in sectors */ +#define DRBD_CONG_FILL_DEF 0U #define DRBD_CONG_FILL_SCALE 's' /* sectors */ #define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN @@ -204,48 +204,48 @@ #define DRBD_PROTOCOL_DEF DRBD_PROT_C -#define DRBD_DISK_BARRIER_DEF 0 -#define DRBD_DISK_FLUSHES_DEF 1 -#define DRBD_DISK_DRAIN_DEF 1 -#define DRBD_MD_FLUSHES_DEF 1 -#define DRBD_TCP_CORK_DEF 1 -#define DRBD_AL_UPDATES_DEF 1 +#define DRBD_DISK_BARRIER_DEF 0U +#define DRBD_DISK_FLUSHES_DEF 1U +#define DRBD_DISK_DRAIN_DEF 1U +#define DRBD_MD_FLUSHES_DEF 1U +#define DRBD_TCP_CORK_DEF 1U +#define DRBD_AL_UPDATES_DEF 1U /* We used to ignore the discard_zeroes_data setting. * To not change established (and expected) behaviour, * by default assume that, for discard_zeroes_data=0, * we can make that an effective discard_zeroes_data=1, * if we only explicitly zero-out unaligned partial chunks. */ -#define DRBD_DISCARD_ZEROES_IF_ALIGNED_DEF 1 +#define DRBD_DISCARD_ZEROES_IF_ALIGNED_DEF 1U /* Some backends pretend to support WRITE SAME, * but fail such requests when they are actually submitted. * This is to tell DRBD to not even try. */ -#define DRBD_DISABLE_WRITE_SAME_DEF 0 +#define DRBD_DISABLE_WRITE_SAME_DEF 0U -#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 -#define DRBD_ALWAYS_ASBP_DEF 0 -#define DRBD_USE_RLE_DEF 1 -#define DRBD_CSUMS_AFTER_CRASH_ONLY_DEF 0 +#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0U +#define DRBD_ALWAYS_ASBP_DEF 0U +#define DRBD_USE_RLE_DEF 1U +#define DRBD_CSUMS_AFTER_CRASH_ONLY_DEF 0U -#define DRBD_AL_STRIPES_MIN 1 -#define DRBD_AL_STRIPES_MAX 1024 -#define DRBD_AL_STRIPES_DEF 1 +#define DRBD_AL_STRIPES_MIN 1U +#define DRBD_AL_STRIPES_MAX 1024U +#define DRBD_AL_STRIPES_DEF 1U #define DRBD_AL_STRIPES_SCALE '1' -#define DRBD_AL_STRIPE_SIZE_MIN 4 -#define DRBD_AL_STRIPE_SIZE_MAX 16777216 -#define DRBD_AL_STRIPE_SIZE_DEF 32 +#define DRBD_AL_STRIPE_SIZE_MIN 4U +#define DRBD_AL_STRIPE_SIZE_MAX 16777216U +#define DRBD_AL_STRIPE_SIZE_DEF 32U #define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */ -#define DRBD_SOCKET_CHECK_TIMEO_MIN 0 +#define DRBD_SOCKET_CHECK_TIMEO_MIN 0U #define DRBD_SOCKET_CHECK_TIMEO_MAX DRBD_PING_TIMEO_MAX -#define DRBD_SOCKET_CHECK_TIMEO_DEF 0 +#define DRBD_SOCKET_CHECK_TIMEO_DEF 0U #define DRBD_SOCKET_CHECK_TIMEO_SCALE '1' -#define DRBD_RS_DISCARD_GRANULARITY_MIN 0 -#define DRBD_RS_DISCARD_GRANULARITY_MAX (1<<20) /* 1MiByte */ -#define DRBD_RS_DISCARD_GRANULARITY_DEF 0 /* disabled by default */ +#define DRBD_RS_DISCARD_GRANULARITY_MIN 0U +#define DRBD_RS_DISCARD_GRANULARITY_MAX (1U<<20) /* 1MiByte */ +#define DRBD_RS_DISCARD_GRANULARITY_DEF 0U /* disabled by default */ #define DRBD_RS_DISCARD_GRANULARITY_SCALE '1' /* bytes */ #endif -- cgit v1.2.3 From fea127b36c93d9afe49561b640fe1fc541dc3ba4 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Tue, 10 Jan 2023 15:36:33 +0100 Subject: block: remove superfluous check for request queue in bdev_is_zoned() Remove the superfluous request queue check in bdev_is_zoned() as bdev_get_queue() can never return NULL. Reviewed-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Signed-off-by: Pankaj Raghav Link: https://lore.kernel.org/r/20230110143635.77300-2-p.raghav@samsung.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b87ed829ab94..0956bc0fb5b0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1285,12 +1285,7 @@ static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) static inline bool bdev_is_zoned(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return blk_queue_is_zoned(q); - - return false; + return blk_queue_is_zoned(bdev_get_queue(bdev)); } static inline bool bdev_op_is_zoned_write(struct block_device *bdev, -- cgit v1.2.3 From e29b210021dcf8e03e0dcc035107afaeb55e6631 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Tue, 10 Jan 2023 15:36:34 +0100 Subject: block: add a new helper bdev_{is_zone_start, offset_from_zone_start} Instead of open coding to check for zone start, add a helper to improve readability and store the logic in one place. Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Pankaj Raghav Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20230110143635.77300-3-p.raghav@samsung.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0956bc0fb5b0..7822c6f4c7bd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1306,6 +1306,18 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) return q->limits.chunk_sectors; } +static inline sector_t bdev_offset_from_zone_start(struct block_device *bdev, + sector_t sector) +{ + return sector & (bdev_zone_sectors(bdev) - 1); +} + +static inline bool bdev_is_zone_start(struct block_device *bdev, + sector_t sector) +{ + return bdev_offset_from_zone_start(bdev, sector) == 0; +} + static inline int queue_dma_alignment(const struct request_queue *q) { return q ? q->limits.dma_alignment : 511; -- cgit v1.2.3 From d67ea690ce0983dadf59cd06facc18f3acc89cea Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Tue, 10 Jan 2023 15:36:35 +0100 Subject: block: introduce bdev_zone_no helper Add a generic bdev_zone_no() helper to calculate zone number for a given sector in a block device. This helper internally uses disk_zone_no() to find the zone number. Use the helper bdev_zone_no() to calculate nr of zones. This lets us make modifications to the math if needed in one place. Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Signed-off-by: Pankaj Raghav Link: https://lore.kernel.org/r/20230110143635.77300-4-p.raghav@samsung.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7822c6f4c7bd..89f51d68c68a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1288,6 +1288,11 @@ static inline bool bdev_is_zoned(struct block_device *bdev) return blk_queue_is_zoned(bdev_get_queue(bdev)); } +static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec) +{ + return disk_zone_no(bdev->bd_disk, sec); +} + static inline bool bdev_op_is_zoned_write(struct block_device *bdev, blk_opf_t op) { -- cgit v1.2.3 From f1c006f1c6850c14040f8337753a63119bba39b9 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 19 Jan 2023 19:03:50 +0800 Subject: blk-cgroup: synchronize pd_free_fn() from blkg_free_workfn() and blkcg_deactivate_policy() Currently parent pd can be freed before child pd: t1: remove cgroup C1 blkcg_destroy_blkgs blkg_destroy list_del_init(&blkg->q_node) // remove blkg from queue list percpu_ref_kill(&blkg->refcnt) blkg_release call_rcu t2: from t1 __blkg_release blkg_free schedule_work t4: deactivate policy blkcg_deactivate_policy pd_free_fn // parent of C1 is freed first t3: from t2 blkg_free_workfn pd_free_fn If policy(for example, ioc_timer_fn() from iocost) access parent pd from child pd after pd_offline_fn(), then UAF can be triggered. Fix the problem by delaying 'list_del_init(&blkg->q_node)' from blkg_destroy() to blkg_free_workfn(), and using a new disk level mutex to synchronize blkg_free_workfn() and blkcg_deactivate_policy(). Signed-off-by: Yu Kuai Acked-by: Tejun Heo Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230119110350.2287325-4-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 89f51d68c68a..b9637d63e6f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -485,6 +485,7 @@ struct request_queue { DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); struct blkcg_gq *root_blkg; struct list_head blkg_list; + struct mutex blkcg_mutex; #endif struct queue_limits limits; -- cgit v1.2.3 From 528eb2cb87bc1353235a6384696b4849bde8b0ba Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Sun, 29 Jan 2023 20:04:57 +0100 Subject: s390/bpf: Implement arch_prepare_bpf_trampoline() arch_prepare_bpf_trampoline() is used for direct attachment of eBPF programs to various places, bypassing kprobes. It's responsible for calling a number of eBPF programs before, instead and/or after whatever they are attached to. Add a s390x implementation, paying attention to the following: - Reuse the existing JIT infrastructure, where possible. - Like the existing JIT, prefer making multiple passes instead of backpatching. Currently 2 passes is enough. If literal pool is introduced, this needs to be raised to 3. However, at the moment adding literal pool only makes the code larger. If branch shortening is introduced, the number of passes needs to be increased even further. - Support both regular and ftrace calling conventions, depending on the trampoline flags. - Use expolines for indirect calls. - Handle the mismatch between the eBPF and the s390x ABIs. - Sign-extend fmod_ret return values. invoke_bpf_prog() produces about 120 bytes; it might be possible to slightly optimize this, but reaching 50 bytes, like on x86_64, looks unrealistic: just loading cookie, __bpf_prog_enter, bpf_func, insnsi and __bpf_prog_exit as literals already takes at least 5 * 12 = 60 bytes, and we can't use relative addressing for most of them. Therefore, lower BPF_MAX_TRAMP_LINKS on s390x. Signed-off-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20230129190501.1624747-5-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c411c6bb86c4..e11db75094d0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -944,7 +944,11 @@ struct btf_func_model { * bytes on x86. */ enum { +#if defined(__s390x__) + BPF_MAX_TRAMP_LINKS = 27, +#else BPF_MAX_TRAMP_LINKS = 38, +#endif }; struct bpf_tramp_links { -- cgit v1.2.3 From 54aa39a513dbf2164ca462a19f04519b2407a224 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 27 Jan 2023 00:35:39 +0300 Subject: net: stmmac: do not stop RX_CLK in Rx LPI state for qcs404 SoC Currently in phy_init_eee() the driver unconditionally configures the PHY to stop RX_CLK after entering Rx LPI state. This causes an LPI interrupt storm on my qcs404-base board. Change the PHY initialization so that for "qcom,qcs404-ethqos" compatible device RX_CLK continues to run even in Rx LPI state. Signed-off-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 83ca2e8eb6b5..a152678b82b7 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -252,6 +252,7 @@ struct plat_stmmacenet_data { int rss_en; int mac_port_sel_speed; bool en_tx_lpi_clockgating; + bool rx_clk_runs_in_lpi; int has_xgmac; bool vlan_fail_q_en; u8 vlan_fail_q; -- cgit v1.2.3 From d35d0c9de762e003129dfc1240df7152a4bc31e8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Jan 2023 15:12:46 +0200 Subject: leds: Add missing includes and forward declarations in leds.h Add missing includes and forward declarations to leds.h. While at it, replace headers by forward declarations and vise versa. Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230103131256.33894-2-andriy.shevchenko@linux.intel.com --- include/linux/leds.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 31cb74b90ffc..4df46a85e5c2 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -10,17 +10,21 @@ #include #include -#include -#include #include #include #include #include +#include #include -struct device; -struct led_pattern; +struct attribute_group; struct device_node; +struct fwnode_handle; +struct gpio_desc; +struct kernfs_node; +struct led_pattern; +struct platform_device; + /* * LED Core */ @@ -529,7 +533,6 @@ struct led_properties { const char *label; }; -struct gpio_desc; typedef int (*gpio_blink_set_t)(struct gpio_desc *desc, int state, unsigned long *delay_on, unsigned long *delay_off); -- cgit v1.2.3 From 156a5bb89ca6f3edd2be0bfd0de15e575442927e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Jan 2023 15:12:47 +0200 Subject: leds: Move led_init_default_state_get() to the global header There are users inside and outside LED framework that have implemented a local copy of led_init_default_state_get(). In order to deduplicate that, as the first step move the declaration from LED header to the global one. Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230103131256.33894-3-andriy.shevchenko@linux.intel.com --- include/linux/leds.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 4df46a85e5c2..d71201a968b6 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -82,6 +82,8 @@ struct led_init_data { bool devname_mandatory; }; +enum led_default_state led_init_default_state_get(struct fwnode_handle *fwnode); + struct led_hw_trigger_type { int dummy; }; -- cgit v1.2.3 From acf63c458b55ecfb2015b33dd6ba3cc8fbc1c5d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:45 +0200 Subject: fpga: m10bmc-sec: Add support for N6000 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for PMCI-based flash access path and N6000 sec update support. Access to flash staging area is different for N6000 from that of the SPI interfaced counterparts. Introduce intel_m10bmc_flash_bulk_ops to allow interface specific differentiations for the flash access path for sec update and make m10bmc_sec_read/write() in sec update driver to use the new operations. The .flash_mutex serializes read/read. Flash update (erase+write) must use ->lock/unlock_write() to prevent reads during update (reads would timeout on setting flash MUX as BMC will prevent it). Create a type specific RSU status reg handler for N6000 because the field has moved from doorbell to auth result register. If a failure is detected while altering the flash MUX, it seems safer to try to set it back and doesn't seem harmful. Likely there are enough troubles in that case anyway so setting it back fails too (which is harmless sans the small extra delay) or just confirms that the value wasn't changed. Co-developed-by: Tianfei zhang Signed-off-by: Tianfei zhang Co-developed-by: Russ Weight Signed-off-by: Russ Weight Acked-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-12-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 51 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index 810534b1bd12..1812ebfa11a8 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -127,6 +127,7 @@ #define M10BMC_N6000_DOORBELL 0x1c0 #define M10BMC_N6000_AUTH_RESULT 0x1c4 +#define AUTH_RESULT_RSU_STATUS GENMASK(23, 16) #define M10BMC_N6000_BUILD_VER 0x0 #define NIOS2_N6000_FW_VERSION 0x4 @@ -148,6 +149,35 @@ #define M10BMC_N6000_STAGING_FLASH_COUNT 0x7ff5000 +#define M10BMC_N6000_FLASH_MUX_CTRL 0x1d0 +#define M10BMC_N6000_FLASH_MUX_SELECTION GENMASK(2, 0) +#define M10BMC_N6000_FLASH_MUX_IDLE 0 +#define M10BMC_N6000_FLASH_MUX_NIOS 1 +#define M10BMC_N6000_FLASH_MUX_HOST 2 +#define M10BMC_N6000_FLASH_MUX_PFL 4 +#define get_flash_mux(mux) FIELD_GET(M10BMC_N6000_FLASH_MUX_SELECTION, mux) + +#define M10BMC_N6000_FLASH_NIOS_REQUEST BIT(4) +#define M10BMC_N6000_FLASH_HOST_REQUEST BIT(5) + +#define M10BMC_N6000_FLASH_CTRL 0x40 +#define M10BMC_N6000_FLASH_WR_MODE BIT(0) +#define M10BMC_N6000_FLASH_RD_MODE BIT(1) +#define M10BMC_N6000_FLASH_BUSY BIT(2) +#define M10BMC_N6000_FLASH_FIFO_SPACE GENMASK(13, 4) +#define M10BMC_N6000_FLASH_READ_COUNT GENMASK(25, 16) + +#define M10BMC_N6000_FLASH_ADDR 0x44 +#define M10BMC_N6000_FLASH_FIFO 0x800 +#define M10BMC_N6000_READ_BLOCK_SIZE 0x800 +#define M10BMC_N6000_FIFO_MAX_BYTES 0x800 +#define M10BMC_N6000_FIFO_WORD_SIZE 4 +#define M10BMC_N6000_FIFO_MAX_WORDS (M10BMC_N6000_FIFO_MAX_BYTES / \ + M10BMC_N6000_FIFO_WORD_SIZE) + +#define M10BMC_FLASH_INT_US 1 +#define M10BMC_FLASH_TIMEOUT_US 10000 + /** * struct m10bmc_csr_map - Intel MAX 10 BMC CSR register map */ @@ -183,16 +213,37 @@ struct intel_m10bmc_platform_info { const struct m10bmc_csr_map *csr_map; }; +struct intel_m10bmc; + +/** + * struct intel_m10bmc_flash_bulk_ops - device specific operations for flash R/W + * @read: read a block of data from flash + * @write: write a block of data to flash + * @lock_write: locks flash access for erase+write + * @unlock_write: unlock flash access + * + * Write must be protected with @lock_write and @unlock_write. While the flash + * is locked, @read returns -EBUSY. + */ +struct intel_m10bmc_flash_bulk_ops { + int (*read)(struct intel_m10bmc *m10bmc, u8 *buf, u32 addr, u32 size); + int (*write)(struct intel_m10bmc *m10bmc, const u8 *buf, u32 offset, u32 size); + int (*lock_write)(struct intel_m10bmc *m10bmc); + void (*unlock_write)(struct intel_m10bmc *m10bmc); +}; + /** * struct intel_m10bmc - Intel MAX 10 BMC parent driver data structure * @dev: this device * @regmap: the regmap used to access registers by m10bmc itself * @info: the platform information for MAX10 BMC + * @flash_bulk_ops: optional device specific operations for flash R/W */ struct intel_m10bmc { struct device *dev; struct regmap *regmap; const struct intel_m10bmc_platform_info *info; + const struct intel_m10bmc_flash_bulk_ops *flash_bulk_ops; }; /* -- cgit v1.2.3 From 79729f26b074a5d2722c27fa76cc45ef721e65cd Mon Sep 17 00:00:00 2001 From: Evgeniy Baskov Date: Tue, 22 Nov 2022 14:12:31 +0300 Subject: efi/libstub: Add memory attribute protocol definitions EFI_MEMORY_ATTRIBUTE_PROTOCOL servers as a better alternative to DXE services for setting memory attributes in EFI Boot Services environment. This protocol is better since it is a part of UEFI specification itself and not UEFI PI specification like DXE services. Add EFI_MEMORY_ATTRIBUTE_PROTOCOL definitions. Support mixed mode properly for its calls. Tested-by: Mario Limonciello Signed-off-by: Evgeniy Baskov Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 1a1adc8d3ba3..507390dda8b9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -391,6 +391,7 @@ void efi_native_runtime_setup(void); #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) #define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9) #define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7) +#define EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID EFI_GUID(0xf4560cf6, 0x40ec, 0x4b4a, 0xa1, 0x92, 0xbf, 0x1d, 0x57, 0xd0, 0xb1, 0x89) #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) -- cgit v1.2.3 From 45e888ef99d9f378c2cbadc3caa4eee1aaf09eac Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 19 Dec 2022 11:20:12 -0800 Subject: gpiolib: of: remove of_gpio_count() There are no more users of of_gpio_count() in the mainline kernel, remove it. Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/linux/of_gpio.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 6db627257a7b..39f16a960565 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -105,17 +105,6 @@ static inline int of_gpio_named_count(const struct device_node *np, return of_count_phandle_with_args(np, propname, "#gpio-cells"); } -/** - * of_gpio_count() - Count GPIOs for a device - * @np: device node to count GPIOs for - * - * Same as of_gpio_named_count, but hard coded to use the 'gpios' property - */ -static inline int of_gpio_count(const struct device_node *np) -{ - return of_gpio_named_count(np, "gpios"); -} - static inline int of_get_gpio_flags(const struct device_node *np, int index, enum of_gpio_flags *flags) { -- cgit v1.2.3 From c7835652a85df183c967f574d1999505ebf80b88 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 19 Dec 2022 11:20:13 -0800 Subject: gpiolib: of: stop exporting of_gpio_named_count() The only user of this function is gpiolib-of.c so move it there. Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/linux/of_gpio.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 39f16a960565..680025c1a55b 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -79,32 +79,6 @@ static inline int of_get_named_gpio_flags(const struct device_node *np, #endif /* CONFIG_OF_GPIO */ -/** - * of_gpio_named_count() - Count GPIOs for a device - * @np: device node to count GPIOs for - * @propname: property name containing gpio specifier(s) - * - * The function returns the count of GPIOs specified for a node. - * Note that the empty GPIO specifiers count too. Returns either - * Number of gpios defined in property, - * -EINVAL for an incorrectly formed gpios property, or - * -ENOENT for a missing gpios property - * - * Example: - * gpios = <0 - * &gpio1 1 2 - * 0 - * &gpio2 3 4>; - * - * The above example defines four GPIOs, two of which are not specified. - * This function will return '4' - */ -static inline int of_gpio_named_count(const struct device_node *np, - const char *propname) -{ - return of_count_phandle_with_args(np, propname, "#gpio-cells"); -} - static inline int of_get_gpio_flags(const struct device_node *np, int index, enum of_gpio_flags *flags) { -- cgit v1.2.3 From 40fc56ee608cdb20022c225ac6f1e4b7ba63f8f1 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 19 Dec 2022 11:20:15 -0800 Subject: gpiolib: of: remove of_get_gpio[_flags]() and of_get_named_gpio_flags() There are no more users of these APIs in the mainline kernel, remove them. This leaves of_get_named_gpio() as the only legacy OF-specific API. Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/linux/of_gpio.h | 45 ++++----------------------------------------- 1 file changed, 4 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 680025c1a55b..e27a9187c0c6 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -50,8 +50,8 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc) return container_of(gc, struct of_mm_gpio_chip, gc); } -extern int of_get_named_gpio_flags(const struct device_node *np, - const char *list_name, int index, enum of_gpio_flags *flags); +extern int of_get_named_gpio(const struct device_node *np, + const char *list_name, int index); extern int of_mm_gpiochip_add_data(struct device_node *np, struct of_mm_gpio_chip *mm_gc, @@ -68,49 +68,12 @@ extern void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc); #include /* Drivers may not strictly depend on the GPIO support, so let them link. */ -static inline int of_get_named_gpio_flags(const struct device_node *np, - const char *list_name, int index, enum of_gpio_flags *flags) -{ - if (flags) - *flags = 0; - - return -ENOSYS; -} - -#endif /* CONFIG_OF_GPIO */ - -static inline int of_get_gpio_flags(const struct device_node *np, int index, - enum of_gpio_flags *flags) -{ - return of_get_named_gpio_flags(np, "gpios", index, flags); -} - -/** - * of_get_named_gpio() - Get a GPIO number to use with GPIO API - * @np: device node to get GPIO from - * @propname: Name of property containing gpio specifier(s) - * @index: index of the GPIO - * - * Returns GPIO number to use with Linux generic GPIO API, or one of the errno - * value on the error condition. - */ static inline int of_get_named_gpio(const struct device_node *np, const char *propname, int index) { - return of_get_named_gpio_flags(np, propname, index, NULL); + return -ENOSYS; } -/** - * of_get_gpio() - Get a GPIO number to use with GPIO API - * @np: device node to get GPIO from - * @index: index of the GPIO - * - * Returns GPIO number to use with Linux generic GPIO API, or one of the errno - * value on the error condition. - */ -static inline int of_get_gpio(const struct device_node *np, int index) -{ - return of_get_gpio_flags(np, index, NULL); -} +#endif /* CONFIG_OF_GPIO */ #endif /* __LINUX_OF_GPIO_H */ -- cgit v1.2.3 From 650f2dc970539b3344a98c4bd18efa309e66623b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 19 Dec 2022 11:20:16 -0800 Subject: gpiolib: of: remove [devm_]gpiod_get_from_of_node() APIs Now that everyone is using [devm_]fwnode_gpiod_get[_index]() APIs, remove OF-specific [devm_]gpiod_get_from_of_node(). Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/consumer.h | 48 ------------------------------------------- 1 file changed, 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 45da8f137fe5..59cb20cfac3d 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -581,54 +581,6 @@ struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev, flags, label); } -#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_OF_GPIO) -struct device_node; - -struct gpio_desc *gpiod_get_from_of_node(const struct device_node *node, - const char *propname, int index, - enum gpiod_flags dflags, - const char *label); - -#else /* CONFIG_GPIOLIB && CONFIG_OF_GPIO */ - -struct device_node; - -static inline -struct gpio_desc *gpiod_get_from_of_node(const struct device_node *node, - const char *propname, int index, - enum gpiod_flags dflags, - const char *label) -{ - return ERR_PTR(-ENOSYS); -} - -#endif /* CONFIG_GPIOLIB && CONFIG_OF_GPIO */ - -#ifdef CONFIG_GPIOLIB -struct device_node; - -struct gpio_desc *devm_gpiod_get_from_of_node(struct device *dev, - const struct device_node *node, - const char *propname, int index, - enum gpiod_flags dflags, - const char *label); - -#else /* CONFIG_GPIOLIB */ - -struct device_node; - -static inline -struct gpio_desc *devm_gpiod_get_from_of_node(struct device *dev, - const struct device_node *node, - const char *propname, int index, - enum gpiod_flags dflags, - const char *label) -{ - return ERR_PTR(-ENOSYS); -} - -#endif /* CONFIG_GPIOLIB */ - struct acpi_gpio_params { unsigned int crs_entry_index; unsigned int line_index; -- cgit v1.2.3 From f2527d8f566a45fa00ee5abd04d1c9476d4d704f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Jan 2023 23:08:50 +0200 Subject: gpio: Remove unused and obsoleted gpio_export_link() gpio_export_link() is legacy and unused API, remove it for good. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Reviewed-by: Yanteng Si Signed-off-by: Bartosz Golaszewski --- include/linux/gpio.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 346f60bbab30..e94815b3ce1d 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -197,14 +197,6 @@ static inline int gpio_export(unsigned gpio, bool direction_may_change) return -EINVAL; } -static inline int gpio_export_link(struct device *dev, const char *name, - unsigned gpio) -{ - /* GPIO can never have been exported */ - WARN_ON(1); - return -EINVAL; -} - static inline void gpio_unexport(unsigned gpio) { /* GPIO can never have been exported */ -- cgit v1.2.3 From 70d0fc4288dabd65025fde7774b4f9262afa9034 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 28 Dec 2022 11:20:44 +0200 Subject: gpiolib: Get rid of not used of_node member All new drivers should use fwnode and / or parent to provide the necessary information to the GPIO library. Cc: Thierry Reding Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 44783fc16125..2316cb7fa667 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -503,13 +503,6 @@ struct gpio_chip { * the device tree automatically may have an OF translation */ - /** - * @of_node: - * - * Pointer to a device tree node representing this GPIO controller. - */ - struct device_node *of_node; - /** * @of_gpio_n_cells: * -- cgit v1.2.3 From 9a7dcaefdb8ab1c2f8558e3a66261165c86c5059 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2023 16:17:43 +0200 Subject: gpiolib: Do not mention legacy API in the code Replace mentioning of legacy API by the latest one. Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 2316cb7fa667..26a808fb8a25 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -336,7 +336,7 @@ struct gpio_irq_chip { * @set_multiple: assigns output values for multiple signals defined by "mask" * @set_config: optional hook for all kinds of settings. Uses the same * packed config format as generic pinconf. - * @to_irq: optional hook supporting non-static gpio_to_irq() mappings; + * @to_irq: optional hook supporting non-static gpiod_to_irq() mappings; * implementation may not sleep * @dbg_show: optional routine to show contents in debugfs; default code * will be used when this is omitted, but custom code can show extra -- cgit v1.2.3 From 149a028a5134ab740079bc88ee58c24a7d072ab9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2023 16:45:26 +0200 Subject: gpiolib: Remove unused of_mm_gpiochip_add() of_mm_gpiochip_add() is unused API, remove it for good. Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/linux/of_gpio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index e27a9187c0c6..935225caf70d 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -56,11 +56,6 @@ extern int of_get_named_gpio(const struct device_node *np, extern int of_mm_gpiochip_add_data(struct device_node *np, struct of_mm_gpio_chip *mm_gc, void *data); -static inline int of_mm_gpiochip_add(struct device_node *np, - struct of_mm_gpio_chip *mm_gc) -{ - return of_mm_gpiochip_add_data(np, mm_gc, NULL); -} extern void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc); #else /* CONFIG_OF_GPIO */ -- cgit v1.2.3 From a5ec171efdc6151d3a51c4e1a59abb3ab9d8b710 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2023 15:39:42 +0200 Subject: gpio: Remove unused and obsoleted irq_to_gpio() irq_to_gpio() is legacy and unused API, remove it for good. This leaves gpio_to_irq() as it's used yet in many places. Nevertheless, removal of its counterpart is a good signal to whoever even trying to consider using them that do not. Signed-off-by: Andy Shevchenko Acked-by: Geert Uytterhoeven Signed-off-by: Bartosz Golaszewski --- include/linux/gpio.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index e94815b3ce1d..85beb236c925 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -81,11 +81,6 @@ static inline int gpio_to_irq(unsigned int gpio) return __gpio_to_irq(gpio); } -static inline int irq_to_gpio(unsigned int irq) -{ - return -EINVAL; -} - #endif /* ! CONFIG_ARCH_HAVE_CUSTOM_GPIO_H */ /* CONFIG_GPIOLIB: bindings for managed devices that want to request gpios */ @@ -210,13 +205,6 @@ static inline int gpio_to_irq(unsigned gpio) return -EINVAL; } -static inline int irq_to_gpio(unsigned irq) -{ - /* irq can never have been returned from gpio_to_irq() */ - WARN_ON(1); - return -EINVAL; -} - static inline int devm_gpio_request(struct device *dev, unsigned gpio, const char *label) { -- cgit v1.2.3 From 91a0192e90e9df40d4b63c4ce11126114ed5d79d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 16 Jan 2023 14:47:02 +0200 Subject: gpio: pcf857x: Get rid of legacy platform data Platform data is a legacy interface to supply device properties to the driver. In this case we don't have in-kernel users for it. Moreover it uses plain GPIO numbers which is no-no for a new code. Just remove it for good. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- include/linux/platform_data/pcf857x.h | 45 ----------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 include/linux/platform_data/pcf857x.h (limited to 'include/linux') diff --git a/include/linux/platform_data/pcf857x.h b/include/linux/platform_data/pcf857x.h deleted file mode 100644 index 01d0a3ea3aef..000000000000 --- a/include/linux/platform_data/pcf857x.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_PCF857X_H -#define __LINUX_PCF857X_H - -/** - * struct pcf857x_platform_data - data to set up pcf857x driver - * @gpio_base: number of the chip's first GPIO - * @n_latch: optional bit-inverse of initial register value; if - * you leave this initialized to zero the driver will act - * like the chip was just reset - * @setup: optional callback issued once the GPIOs are valid - * @teardown: optional callback issued before the GPIOs are invalidated - * @context: optional parameter passed to setup() and teardown() - * - * In addition to the I2C_BOARD_INFO() state appropriate to each chip, - * the i2c_board_info used with the pcf875x driver must provide its - * platform_data (pointer to one of these structures) with at least - * the gpio_base value initialized. - * - * The @setup callback may be used with the kind of board-specific glue - * which hands the (now-valid) GPIOs to other drivers, or which puts - * devices in their initial states using these GPIOs. - * - * These GPIO chips are only "quasi-bidirectional"; read the chip specs - * to understand the behavior. They don't have separate registers to - * record which pins are used for input or output, record which output - * values are driven, or provide access to input values. That must be - * inferred by reading the chip's value and knowing the last value written - * to it. If you leave n_latch initialized to zero, that last written - * value is presumed to be all ones (as if the chip were just reset). - */ -struct pcf857x_platform_data { - unsigned gpio_base; - unsigned n_latch; - - int (*setup)(struct i2c_client *client, - int gpio, unsigned ngpio, - void *context); - void (*teardown)(struct i2c_client *client, - int gpio, unsigned ngpio, - void *context); - void *context; -}; - -#endif /* __LINUX_PCF857X_H */ -- cgit v1.2.3 From eed5a3bfafe6840494f7752b5cecd2a610b54fef Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2023 16:51:40 +0200 Subject: gpiolib: of: Move enum of_gpio_flags to its only user GPIO library for OF is the only user for enum of_gpio_flags. Move it there. Signed-off-by: Andy Shevchenko Acked-by: Rob Herring Reviewed-by: Arnd Bergmann Signed-off-by: Bartosz Golaszewski --- include/linux/of_gpio.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 935225caf70d..5d58b3b0a97e 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -17,21 +17,6 @@ struct device_node; -/* - * This is Linux-specific flags. By default controllers' and Linux' mapping - * match, but GPIO controllers are free to translate their own flags to - * Linux-specific in their .xlate callback. Though, 1:1 mapping is recommended. - */ -enum of_gpio_flags { - OF_GPIO_ACTIVE_LOW = 0x1, - OF_GPIO_SINGLE_ENDED = 0x2, - OF_GPIO_OPEN_DRAIN = 0x4, - OF_GPIO_TRANSITORY = 0x8, - OF_GPIO_PULL_UP = 0x10, - OF_GPIO_PULL_DOWN = 0x20, - OF_GPIO_PULL_DISABLE = 0x40, -}; - #ifdef CONFIG_OF_GPIO #include -- cgit v1.2.3 From dbd7a2a941b8cbf9e5f79a777ed9fe0090eebb61 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 23 Jan 2023 07:37:45 -0800 Subject: PM / devfreq: Fix build issues with devfreq disabled The existing no-op shims for when PM_DEVFREQ (or an individual governor) only do half the job. The governor specific config/tuning structs need to be available to avoid compile errors in drivers using devfreq. Fixes: 6563f60f14cb ("drm/msm/gpu: Add devfreq tuning debugfs") Signed-off-by: Rob Clark Acked-by: MyungJoo Ham Acked-by: Chanwoo Choi Patchwork: https://patchwork.freedesktop.org/patch/519801/ Link: https://lore.kernel.org/r/20230123153745.3185032-1-robdclark@gmail.com --- include/linux/devfreq.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 4dc7cda4fd46..7fd704bb8f3d 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -273,8 +273,8 @@ void devm_devfreq_unregister_notifier(struct device *dev, struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node); struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, const char *phandle_name, int index); +#endif /* CONFIG_PM_DEVFREQ */ -#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) /** * struct devfreq_simple_ondemand_data - ``void *data`` fed to struct devfreq * and devfreq_add_device @@ -292,9 +292,7 @@ struct devfreq_simple_ondemand_data { unsigned int upthreshold; unsigned int downdifferential; }; -#endif -#if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE) enum devfreq_parent_dev_type { DEVFREQ_PARENT_DEV, CPUFREQ_PARENT_DEV, @@ -337,9 +335,8 @@ struct devfreq_passive_data { struct notifier_block nb; struct list_head cpu_data_list; }; -#endif -#else /* !CONFIG_PM_DEVFREQ */ +#if !defined(CONFIG_PM_DEVFREQ) static inline struct devfreq *devfreq_add_device(struct device *dev, struct devfreq_dev_profile *profile, const char *governor_name, -- cgit v1.2.3 From b3c650ad9bb88ecf36b9aeacf9e7eb7478258da7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 21 Nov 2022 17:15:41 +0100 Subject: pwm: Move pwm_capture() dummy to restore order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the dummy pwm_capture(), to make the declaration order of all dummies to match the declaration order of the real functions. Signed-off-by: Geert Uytterhoeven Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 161e91167b9c..7b7b93b6fb81 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -440,13 +440,6 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns, return -EINVAL; } -static inline int pwm_capture(struct pwm_device *pwm, - struct pwm_capture *result, - unsigned long timeout) -{ - return -EINVAL; -} - static inline int pwm_enable(struct pwm_device *pwm) { might_sleep(); @@ -458,6 +451,13 @@ static inline void pwm_disable(struct pwm_device *pwm) might_sleep(); } +static inline int pwm_capture(struct pwm_device *pwm, + struct pwm_capture *result, + unsigned long timeout) +{ + return -EINVAL; +} + static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data) { return -EINVAL; -- cgit v1.2.3 From 724ef01569519d1e7a95231688b6a5f8eaba29f2 Mon Sep 17 00:00:00 2001 From: Mario Kicherer Date: Thu, 26 Jan 2023 15:40:50 +0100 Subject: mtd: spinand: Add support for AllianceMemory AS5F34G04SND Add support for AllianceMemory AS5F34G04SND SPI NAND flash Datasheet: - https://www.alliancememory.com/wp-content/uploads/pdf/flash/AllianceMemory_SPI_NAND_Flash_July2020_Rev1.0.pdf Signed-off-by: Mario Kicherer Reviewed-by: Dhruva Gole Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230126144050.2656358-1-dev@kicherer.org --- include/linux/mtd/spinand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 6d3392a7edc6..01be9f0f008a 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -260,6 +260,7 @@ struct spinand_manufacturer { }; /* SPI NAND manufacturers */ +extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; extern const struct spinand_manufacturer ato_spinand_manufacturer; extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; -- cgit v1.2.3 From 246a1c4c6b7ffba88a2553d2b88f7b6280f253a2 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Mon, 23 Jan 2023 20:09:54 +0100 Subject: ata: pata_parport: add driver (PARIDE replacement) The pata_parport is a libata-based replacement of the old PARIDE subsystem - driver for parallel port IDE devices. It uses the original paride low-level protocol drivers but does not need the high-level drivers (pd, pcd, pf, pt, pg). The IDE devices behind parallel port adapters are handled by the ATA layer. This will allow paride and its high-level drivers to be removed. Unfortunately, libata drivers cannot sleep so pata_parport claims parport before activating the ata host and keeps it claimed (and protocol connected) until the ata host is removed. This means that no devices can be chained (neither other pata_parport devices nor a printer). paride and pata_parport are mutually exclusive because the compiled protocol drivers are incompatible. Tested with: - Imation SuperDisk LS-120 and HP C4381A (EPAT) - Freecom Parallel CD (FRPW) - Toshiba Mobile CD-RW 2793008 w/Freecom Parallel Cable rev.903 (FRIQ) - Backpack CD-RW 222011 and CD-RW 19350 (BPCK6) The following bugs in low-level protocol drivers were found and will be fixed later: Note: EPP-32 mode is buggy in EPAT - and also in all other protocol drivers - they don't handle non-multiple-of-4 block transfers correctly. This causes problems with LS-120 drive. There is also another bug in EPAT: EPP modes don't work unless a 4-bit or 8-bit mode is used first (probably some initialization missing?). Once the device is initialized, EPP works until power cycle. So after device power on, you have to: echo "parport0 epat 0" >/sys/bus/pata_parport/new_device echo pata_parport.0 >/sys/bus/pata_parport/delete_device echo "parport0 epat 4" >/sys/bus/pata_parport/new_device (autoprobe will initialize correctly as it tries the slowest modes first but you'll get the broken EPP-32 mode) Note: EPP modes are buggy in FRPW, only modes 0 and 1 work. Signed-off-by: Ondrej Zary Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Acked-by: Jens Axboe Signed-off-by: Damien Le Moal --- include/linux/pata_parport.h | 111 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 include/linux/pata_parport.h (limited to 'include/linux') diff --git a/include/linux/pata_parport.h b/include/linux/pata_parport.h new file mode 100644 index 000000000000..58781846f282 --- /dev/null +++ b/include/linux/pata_parport.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * pata_parport.h (c) 1997-8 Grant R. Guenther + * Under the terms of the GPL. + * + * This file defines the interface for parallel port IDE adapter chip drivers. + */ + +#ifndef LINUX_PATA_PARPORT_H +#define LINUX_PATA_PARPORT_H + +#include + +#define PI_PCD 1 /* dummy for paride protocol modules */ + +struct pi_adapter { + struct device dev; + struct pi_protocol *proto; /* adapter protocol */ + int port; /* base address of parallel port */ + int mode; /* transfer mode in use */ + int delay; /* adapter delay setting */ + int devtype; /* dummy for paride protocol modules */ + char *device; /* dummy for paride protocol modules */ + int unit; /* unit number for chained adapters */ + int saved_r0; /* saved port state */ + int saved_r2; /* saved port state */ + unsigned long private; /* for protocol module */ + struct pardevice *pardev; /* pointer to pardevice */ +}; + +typedef struct pi_adapter PIA; /* for paride protocol modules */ + +/* registers are addressed as (cont,regr) + * cont: 0 for command register file, 1 for control register(s) + * regr: 0-7 for register number. + */ + +/* macros and functions exported to the protocol modules */ +#define delay_p (pi->delay ? udelay(pi->delay) : (void)0) +#define out_p(offs, byte) do { outb(byte, pi->port + offs); delay_p; } while (0) +#define in_p(offs) (delay_p, inb(pi->port + offs)) + +#define w0(byte) out_p(0, byte) +#define r0() in_p(0) +#define w1(byte) out_p(1, byte) +#define r1() in_p(1) +#define w2(byte) out_p(2, byte) +#define r2() in_p(2) +#define w3(byte) out_p(3, byte) +#define w4(byte) out_p(4, byte) +#define r4() in_p(4) +#define w4w(data) do { outw(data, pi->port + 4); delay_p; } while (0) +#define w4l(data) do { outl(data, pi->port + 4); delay_p; } while (0) +#define r4w() (delay_p, inw(pi->port + 4)) +#define r4l() (delay_p, inl(pi->port + 4)) + +static inline u16 pi_swab16(char *b, int k) +{ + union { u16 u; char t[2]; } r; + + r.t[0] = b[2 * k + 1]; r.t[1] = b[2 * k]; + return r.u; +} + +static inline u32 pi_swab32(char *b, int k) +{ + union { u32 u; char f[4]; } r; + + r.f[0] = b[4 * k + 1]; r.f[1] = b[4 * k]; + r.f[2] = b[4 * k + 3]; r.f[3] = b[4 * k + 2]; + return r.u; +} + +struct pi_protocol { + char name[8]; + + int max_mode; + int epp_first; /* modes >= this use 8 ports */ + + int default_delay; + int max_units; /* max chained units probed for */ + + void (*write_regr)(struct pi_adapter *pi, int cont, int regr, int val); + int (*read_regr)(struct pi_adapter *pi, int cont, int regr); + void (*write_block)(struct pi_adapter *pi, char *buf, int count); + void (*read_block)(struct pi_adapter *pi, char *buf, int count); + + void (*connect)(struct pi_adapter *pi); + void (*disconnect)(struct pi_adapter *pi); + + int (*test_port)(struct pi_adapter *pi); + int (*probe_unit)(struct pi_adapter *pi); + int (*test_proto)(struct pi_adapter *pi, char *scratch, int verbose); + void (*log_adapter)(struct pi_adapter *pi, char *scratch, int verbose); + + int (*init_proto)(struct pi_adapter *pi); + void (*release_proto)(struct pi_adapter *pi); + struct module *owner; + struct device_driver driver; + struct scsi_host_template sht; +}; + +#define PATA_PARPORT_SHT ATA_PIO_SHT + +int pata_parport_register_driver(struct pi_protocol *pr); +void pata_parport_unregister_driver(struct pi_protocol *pr); +/* defines for old paride protocol modules */ +#define paride_register pata_parport_register_driver +#define paride_unregister pata_parport_unregister_driver + +#endif /* LINUX_PATA_PARPORT_H */ -- cgit v1.2.3 From 4744c7ad2299108bc3ecbea574cffb052863ba26 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 6 Jul 2022 13:06:30 +0000 Subject: net/mlx5: Add IFC bits for general obj create param Before this patch, the log_obj_range was defined inside general_obj_in_cmd_hdr to support bulk allocation. However, we need to modify/query one of the object in the bulk in later patch, so change those fields to param bits for parameters specific for cmd header, and add general_obj_create_param according to what was updated in spec. We will also add general_obj_query_param for modify/query later. Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 0b102c651fe2..17e293ceb625 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -6196,6 +6196,13 @@ struct mlx5_ifc_match_definer_bits { }; }; +struct mlx5_ifc_general_obj_create_param_bits { + u8 alias_object[0x1]; + u8 reserved_at_1[0x2]; + u8 log_obj_range[0x5]; + u8 reserved_at_8[0x18]; +}; + struct mlx5_ifc_general_obj_in_cmd_hdr_bits { u8 opcode[0x10]; u8 uid[0x10]; @@ -6205,9 +6212,7 @@ struct mlx5_ifc_general_obj_in_cmd_hdr_bits { u8 obj_id[0x20]; - u8 reserved_at_60[0x3]; - u8 log_obj_range[0x5]; - u8 reserved_at_68[0x18]; + struct mlx5_ifc_general_obj_create_param_bits op_param; }; struct mlx5_ifc_general_obj_out_cmd_hdr_bits { -- cgit v1.2.3 From 9a0ed4f2bfe220e9389cc86d7f9b284119037eb8 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 17 Feb 2022 09:29:35 +0000 Subject: net/mlx5: Add IFC bits and enums for crypto key Add and extend structure layouts and defines for fast crypto key update. This is a prerequisite to support bulk creation, key modification and destruction, software wrapped DEK, and SYNC_CRYPTO command. Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 146 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 140 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 17e293ceb625..7143b65f9f4a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -306,6 +306,7 @@ enum { MLX5_CMD_OP_SYNC_STEERING = 0xb00, MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d, MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, + MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, MLX5_CMD_OP_MAX }; @@ -1112,6 +1113,30 @@ struct mlx5_ifc_sync_steering_out_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_sync_crypto_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x10]; + u8 crypto_type[0x10]; + + u8 reserved_at_80[0x80]; +}; + +struct mlx5_ifc_sync_crypto_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_device_mem_cap_bits { u8 memic[0x1]; u8 reserved_at_1[0x1f]; @@ -1768,7 +1793,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ats[0x1]; u8 reserved_at_462[0x1]; u8 log_max_uctx[0x5]; - u8 reserved_at_468[0x2]; + u8 reserved_at_468[0x1]; + u8 crypto[0x1]; u8 ipsec_offload[0x1]; u8 log_max_umem[0x5]; u8 max_num_eqs[0x10]; @@ -3351,6 +3377,30 @@ struct mlx5_ifc_shampo_cap_bits { u8 reserved_at_40[0x7c0]; }; +struct mlx5_ifc_crypto_cap_bits { + u8 reserved_at_0[0x3]; + u8 synchronize_dek[0x1]; + u8 int_kek_manual[0x1]; + u8 int_kek_auto[0x1]; + u8 reserved_at_6[0x1a]; + + u8 reserved_at_20[0x3]; + u8 log_dek_max_alloc[0x5]; + u8 reserved_at_28[0x3]; + u8 log_max_num_deks[0x5]; + u8 reserved_at_30[0x10]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x3]; + u8 log_dek_granularity[0x5]; + u8 reserved_at_68[0x3]; + u8 log_max_num_int_kek[0x5]; + u8 sw_wrapped_dek[0x10]; + + u8 reserved_at_80[0x780]; +}; + union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap; struct mlx5_ifc_cmd_hca_cap_2_bits cmd_hca_cap_2; @@ -3371,6 +3421,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap; struct mlx5_ifc_shampo_cap_bits shampo_cap; struct mlx5_ifc_macsec_cap_bits macsec_cap; + struct mlx5_ifc_crypto_cap_bits crypto_cap; u8 reserved_at_0[0x8000]; }; @@ -6203,6 +6254,11 @@ struct mlx5_ifc_general_obj_create_param_bits { u8 reserved_at_8[0x18]; }; +struct mlx5_ifc_general_obj_query_param_bits { + u8 alias_object[0x1]; + u8 obj_offset[0x1f]; +}; + struct mlx5_ifc_general_obj_in_cmd_hdr_bits { u8 opcode[0x10]; u8 uid[0x10]; @@ -6212,7 +6268,10 @@ struct mlx5_ifc_general_obj_in_cmd_hdr_bits { u8 obj_id[0x20]; - struct mlx5_ifc_general_obj_create_param_bits op_param; + union { + struct mlx5_ifc_general_obj_create_param_bits create; + struct mlx5_ifc_general_obj_query_param_bits query; + } op_param; }; struct mlx5_ifc_general_obj_out_cmd_hdr_bits { @@ -11707,6 +11766,7 @@ enum { MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20, MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, + MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, }; enum { @@ -11886,10 +11946,44 @@ struct mlx5_ifc_query_macsec_obj_out_bits { struct mlx5_ifc_macsec_offload_obj_bits macsec_object; }; +struct mlx5_ifc_wrapped_dek_bits { + u8 gcm_iv[0x60]; + + u8 reserved_at_60[0x20]; + + u8 const0[0x1]; + u8 key_size[0x1]; + u8 reserved_at_82[0x2]; + u8 key2_invalid[0x1]; + u8 reserved_at_85[0x3]; + u8 pd[0x18]; + + u8 key_purpose[0x5]; + u8 reserved_at_a5[0x13]; + u8 kek_id[0x8]; + + u8 reserved_at_c0[0x40]; + + u8 key1[0x8][0x20]; + + u8 key2[0x8][0x20]; + + u8 reserved_at_300[0x40]; + + u8 const1[0x1]; + u8 reserved_at_341[0x1f]; + + u8 reserved_at_360[0x20]; + + u8 auth_tag[0x80]; +}; + struct mlx5_ifc_encryption_key_obj_bits { u8 modify_field_select[0x40]; - u8 reserved_at_40[0x14]; + u8 state[0x8]; + u8 sw_wrapped[0x1]; + u8 reserved_at_49[0xb]; u8 key_size[0x4]; u8 reserved_at_58[0x4]; u8 key_type[0x4]; @@ -11897,10 +11991,17 @@ struct mlx5_ifc_encryption_key_obj_bits { u8 reserved_at_60[0x8]; u8 pd[0x18]; - u8 reserved_at_80[0x180]; - u8 key[8][0x20]; + u8 reserved_at_80[0x100]; + + u8 opaque[0x40]; + + u8 reserved_at_1c0[0x40]; - u8 reserved_at_300[0x500]; + u8 key[8][0x80]; + + u8 sw_wrapped_dek[8][0x80]; + + u8 reserved_at_a00[0x600]; }; struct mlx5_ifc_create_encryption_key_in_bits { @@ -11908,6 +12009,11 @@ struct mlx5_ifc_create_encryption_key_in_bits { struct mlx5_ifc_encryption_key_obj_bits encryption_key_object; }; +struct mlx5_ifc_modify_encryption_key_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + struct mlx5_ifc_encryption_key_obj_bits encryption_key_object; +}; + enum { MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH = 0x0, MLX5_FLOW_METER_MODE_BYTES_CALC_WITH_L2 = 0x1, @@ -11963,6 +12069,34 @@ struct mlx5_ifc_create_flow_meter_aso_obj_in_bits { struct mlx5_ifc_flow_meter_aso_obj_bits flow_meter_aso_obj; }; +struct mlx5_ifc_int_kek_obj_bits { + u8 modify_field_select[0x40]; + + u8 state[0x8]; + u8 auto_gen[0x1]; + u8 reserved_at_49[0xb]; + u8 key_size[0x4]; + u8 reserved_at_58[0x8]; + + u8 reserved_at_60[0x8]; + u8 pd[0x18]; + + u8 reserved_at_80[0x180]; + u8 key[8][0x80]; + + u8 reserved_at_600[0x200]; +}; + +struct mlx5_ifc_create_int_kek_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + struct mlx5_ifc_int_kek_obj_bits int_kek_object; +}; + +struct mlx5_ifc_create_int_kek_obj_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; + struct mlx5_ifc_int_kek_obj_bits int_kek_object; +}; + struct mlx5_ifc_sampler_obj_bits { u8 modify_field_select[0x40]; -- cgit v1.2.3 From 60c8972d2cccba19ce5a993bb8221e609702ef0d Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 22 Nov 2022 06:09:22 +0000 Subject: net/mlx5: Change key type to key purpose Change the naming of key type in DEK fields and macros, to be consistent with the device spec. Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7143b65f9f4a..1b6201bb04c1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -11986,7 +11986,7 @@ struct mlx5_ifc_encryption_key_obj_bits { u8 reserved_at_49[0xb]; u8 key_size[0x4]; u8 reserved_at_58[0x4]; - u8 key_type[0x4]; + u8 key_purpose[0x4]; u8 reserved_at_60[0x8]; u8 pd[0x18]; @@ -12135,9 +12135,9 @@ enum { }; enum { - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS = 0x1, - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC = 0x2, - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_MACSEC = 0x4, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_TLS = 0x1, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_IPSEC = 0x2, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_MACSEC = 0x4, }; struct mlx5_ifc_tls_static_params_bits { -- cgit v1.2.3 From fe298bdf6f654d4ad93069db9fe93b3b6632f4f1 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 23 May 2022 04:10:02 +0000 Subject: net/mlx5: Prepare for fast crypto key update if hardware supports it Add CAP for crypto offload, do the simple initialization if hardware supports it. Currently set log_dek_obj_range to 12, so 4k DEKs will be created in one bulk allocation. Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 ++++ include/linux/mlx5/driver.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 29d4b201c7b2..bc531bd9804f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1204,6 +1204,7 @@ enum mlx5_cap_type { MLX5_CAP_VDPA_EMULATION = 0x13, MLX5_CAP_DEV_EVENT = 0x14, MLX5_CAP_IPSEC, + MLX5_CAP_CRYPTO = 0x1a, MLX5_CAP_DEV_SHAMPO = 0x1d, MLX5_CAP_MACSEC = 0x1f, MLX5_CAP_GENERAL_2 = 0x20, @@ -1460,6 +1461,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_IPSEC(mdev, cap)\ MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap) +#define MLX5_CAP_CRYPTO(mdev, cap)\ + MLX5_GET(crypto_cap, (mdev)->caps.hca[MLX5_CAP_CRYPTO]->cur, cap) + #define MLX5_CAP_DEV_SHAMPO(mdev, cap)\ MLX5_GET(shampo_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_SHAMPO], cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 44167760ff29..cd529e051b4d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -516,6 +516,7 @@ struct mlx5_vhca_state_notifier; struct mlx5_sf_dev_table; struct mlx5_sf_hw_table; struct mlx5_sf_table; +struct mlx5_crypto_dek_priv; struct mlx5_rate_limit { u32 rate; @@ -673,6 +674,7 @@ struct mlx5e_resources { } hw_objs; struct devlink_port dl_port; struct net_device *uplink_netdev; + struct mlx5_crypto_dek_priv *dek_priv; }; enum mlx5_sw_icm_type { -- cgit v1.2.3 From 6755dee8343cbc4af45e001d904c9a857a451bec Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 26 Jan 2023 17:15:59 +0100 Subject: cc2520: move to gpio descriptors cc2520 supports both probing from static platform_data and from devicetree, but there have never been any definitions of the platform data in the mainline kernel, so it's safe to assume that only the DT path is used. After folding cc2520_platform_data into the driver itself, the GPIO handling can be simplified by moving to the modern gpiod interface. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230126161658.2983292-1-arnd@kernel.org Signed-off-by: Stefan Schmidt --- include/linux/spi/cc2520.h | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 include/linux/spi/cc2520.h (limited to 'include/linux') diff --git a/include/linux/spi/cc2520.h b/include/linux/spi/cc2520.h deleted file mode 100644 index 449bacf10700..000000000000 --- a/include/linux/spi/cc2520.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* Header file for cc2520 radio driver - * - * Copyright (C) 2014 Varka Bhadram - * Md.Jamal Mohiuddin - * P Sowjanya - */ - -#ifndef __CC2520_H -#define __CC2520_H - -struct cc2520_platform_data { - int fifo; - int fifop; - int cca; - int sfd; - int reset; - int vreg; -}; - -#endif -- cgit v1.2.3 From e1d5d71d189f290343fb1f18eecf77335c5d1ef3 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Fri, 27 Jan 2023 00:14:53 +0100 Subject: usb: uvc: move media/v4l2-uvc.h to usb/uvc.h Since the headerfile is only used in usb devices it is better placed with the other usb files. Reviewed-by: Laurent Pinchart Reviewed-by: Daniel Scally Tested-by: Daniel Scally Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20230126231456.3402323-3-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/uvc.h | 367 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 include/linux/usb/uvc.h (limited to 'include/linux') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h new file mode 100644 index 000000000000..b010a36fc1d9 --- /dev/null +++ b/include/linux/usb/uvc.h @@ -0,0 +1,367 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * v4l2 uvc internal API header + * + * Some commonly needed functions for uvc drivers + */ + +#ifndef __LINUX_V4L2_UVC_H +#define __LINUX_V4L2_UVC_H + +/* ------------------------------------------------------------------------ + * GUIDs + */ +#define UVC_GUID_UVC_CAMERA \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01} +#define UVC_GUID_UVC_OUTPUT \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02} +#define UVC_GUID_UVC_MEDIA_TRANSPORT_INPUT \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03} +#define UVC_GUID_UVC_PROCESSING \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01} +#define UVC_GUID_UVC_SELECTOR \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02} +#define UVC_GUID_EXT_GPIO_CONTROLLER \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03} + +#define UVC_GUID_FORMAT_MJPEG \ + { 'M', 'J', 'P', 'G', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_YUY2 \ + { 'Y', 'U', 'Y', '2', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_YUY2_ISIGHT \ + { 'Y', 'U', 'Y', '2', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_NV12 \ + { 'N', 'V', '1', '2', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_YV12 \ + { 'Y', 'V', '1', '2', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_I420 \ + { 'I', '4', '2', '0', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_UYVY \ + { 'U', 'Y', 'V', 'Y', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y800 \ + { 'Y', '8', '0', '0', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y8 \ + { 'Y', '8', ' ', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y10 \ + { 'Y', '1', '0', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y12 \ + { 'Y', '1', '2', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y16 \ + { 'Y', '1', '6', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_BY8 \ + { 'B', 'Y', '8', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_BA81 \ + { 'B', 'A', '8', '1', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_GBRG \ + { 'G', 'B', 'R', 'G', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_GRBG \ + { 'G', 'R', 'B', 'G', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_RGGB \ + { 'R', 'G', 'G', 'B', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_BG16 \ + { 'B', 'G', '1', '6', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_GB16 \ + { 'G', 'B', '1', '6', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_RG16 \ + { 'R', 'G', '1', '6', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_GR16 \ + { 'G', 'R', '1', '6', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_RGBP \ + { 'R', 'G', 'B', 'P', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_BGR3 \ + { 0x7d, 0xeb, 0x36, 0xe4, 0x4f, 0x52, 0xce, 0x11, \ + 0x9f, 0x53, 0x00, 0x20, 0xaf, 0x0b, 0xa7, 0x70} +#define UVC_GUID_FORMAT_BGR4 \ + { 0x7e, 0xeb, 0x36, 0xe4, 0x4f, 0x52, 0xce, 0x11, \ + 0x9f, 0x53, 0x00, 0x20, 0xaf, 0x0b, 0xa7, 0x70} +#define UVC_GUID_FORMAT_M420 \ + { 'M', '4', '2', '0', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} + +#define UVC_GUID_FORMAT_H264 \ + { 'H', '2', '6', '4', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_H265 \ + { 'H', '2', '6', '5', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y8I \ + { 'Y', '8', 'I', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y12I \ + { 'Y', '1', '2', 'I', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Z16 \ + { 'Z', '1', '6', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_RW10 \ + { 'R', 'W', '1', '0', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_INVZ \ + { 'I', 'N', 'V', 'Z', 0x90, 0x2d, 0x58, 0x4a, \ + 0x92, 0x0b, 0x77, 0x3f, 0x1f, 0x2c, 0x55, 0x6b} +#define UVC_GUID_FORMAT_INZI \ + { 'I', 'N', 'Z', 'I', 0x66, 0x1a, 0x42, 0xa2, \ + 0x90, 0x65, 0xd0, 0x18, 0x14, 0xa8, 0xef, 0x8a} +#define UVC_GUID_FORMAT_INVI \ + { 'I', 'N', 'V', 'I', 0xdb, 0x57, 0x49, 0x5e, \ + 0x8e, 0x3f, 0xf4, 0x79, 0x53, 0x2b, 0x94, 0x6f} +#define UVC_GUID_FORMAT_CNF4 \ + { 'C', ' ', ' ', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} + +#define UVC_GUID_FORMAT_D3DFMT_L8 \ + {0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_KSMEDIA_L8_IR \ + {0x32, 0x00, 0x00, 0x00, 0x02, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} + +#define UVC_GUID_FORMAT_HEVC \ + { 'H', 'E', 'V', 'C', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} + +/* ------------------------------------------------------------------------ + * Video formats + */ + +struct uvc_format_desc { + char *name; + u8 guid[16]; + u32 fcc; +}; + +static struct uvc_format_desc uvc_fmts[] = { + { + .name = "YUV 4:2:2 (YUYV)", + .guid = UVC_GUID_FORMAT_YUY2, + .fcc = V4L2_PIX_FMT_YUYV, + }, + { + .name = "YUV 4:2:2 (YUYV)", + .guid = UVC_GUID_FORMAT_YUY2_ISIGHT, + .fcc = V4L2_PIX_FMT_YUYV, + }, + { + .name = "YUV 4:2:0 (NV12)", + .guid = UVC_GUID_FORMAT_NV12, + .fcc = V4L2_PIX_FMT_NV12, + }, + { + .name = "MJPEG", + .guid = UVC_GUID_FORMAT_MJPEG, + .fcc = V4L2_PIX_FMT_MJPEG, + }, + { + .name = "YVU 4:2:0 (YV12)", + .guid = UVC_GUID_FORMAT_YV12, + .fcc = V4L2_PIX_FMT_YVU420, + }, + { + .name = "YUV 4:2:0 (I420)", + .guid = UVC_GUID_FORMAT_I420, + .fcc = V4L2_PIX_FMT_YUV420, + }, + { + .name = "YUV 4:2:0 (M420)", + .guid = UVC_GUID_FORMAT_M420, + .fcc = V4L2_PIX_FMT_M420, + }, + { + .name = "YUV 4:2:2 (UYVY)", + .guid = UVC_GUID_FORMAT_UYVY, + .fcc = V4L2_PIX_FMT_UYVY, + }, + { + .name = "Greyscale 8-bit (Y800)", + .guid = UVC_GUID_FORMAT_Y800, + .fcc = V4L2_PIX_FMT_GREY, + }, + { + .name = "Greyscale 8-bit (Y8 )", + .guid = UVC_GUID_FORMAT_Y8, + .fcc = V4L2_PIX_FMT_GREY, + }, + { + .name = "Greyscale 8-bit (D3DFMT_L8)", + .guid = UVC_GUID_FORMAT_D3DFMT_L8, + .fcc = V4L2_PIX_FMT_GREY, + }, + { + .name = "IR 8-bit (L8_IR)", + .guid = UVC_GUID_FORMAT_KSMEDIA_L8_IR, + .fcc = V4L2_PIX_FMT_GREY, + }, + { + .name = "Greyscale 10-bit (Y10 )", + .guid = UVC_GUID_FORMAT_Y10, + .fcc = V4L2_PIX_FMT_Y10, + }, + { + .name = "Greyscale 12-bit (Y12 )", + .guid = UVC_GUID_FORMAT_Y12, + .fcc = V4L2_PIX_FMT_Y12, + }, + { + .name = "Greyscale 16-bit (Y16 )", + .guid = UVC_GUID_FORMAT_Y16, + .fcc = V4L2_PIX_FMT_Y16, + }, + { + .name = "BGGR Bayer (BY8 )", + .guid = UVC_GUID_FORMAT_BY8, + .fcc = V4L2_PIX_FMT_SBGGR8, + }, + { + .name = "BGGR Bayer (BA81)", + .guid = UVC_GUID_FORMAT_BA81, + .fcc = V4L2_PIX_FMT_SBGGR8, + }, + { + .name = "GBRG Bayer (GBRG)", + .guid = UVC_GUID_FORMAT_GBRG, + .fcc = V4L2_PIX_FMT_SGBRG8, + }, + { + .name = "GRBG Bayer (GRBG)", + .guid = UVC_GUID_FORMAT_GRBG, + .fcc = V4L2_PIX_FMT_SGRBG8, + }, + { + .name = "RGGB Bayer (RGGB)", + .guid = UVC_GUID_FORMAT_RGGB, + .fcc = V4L2_PIX_FMT_SRGGB8, + }, + { + .name = "RGB565", + .guid = UVC_GUID_FORMAT_RGBP, + .fcc = V4L2_PIX_FMT_RGB565, + }, + { + .name = "BGR 8:8:8 (BGR3)", + .guid = UVC_GUID_FORMAT_BGR3, + .fcc = V4L2_PIX_FMT_BGR24, + }, + { + .name = "BGRA/X 8:8:8:8 (BGR4)", + .guid = UVC_GUID_FORMAT_BGR4, + .fcc = V4L2_PIX_FMT_XBGR32, + }, + { + .name = "H.264", + .guid = UVC_GUID_FORMAT_H264, + .fcc = V4L2_PIX_FMT_H264, + }, + { + .name = "H.265", + .guid = UVC_GUID_FORMAT_H265, + .fcc = V4L2_PIX_FMT_HEVC, + }, + { + .name = "Greyscale 8 L/R (Y8I)", + .guid = UVC_GUID_FORMAT_Y8I, + .fcc = V4L2_PIX_FMT_Y8I, + }, + { + .name = "Greyscale 12 L/R (Y12I)", + .guid = UVC_GUID_FORMAT_Y12I, + .fcc = V4L2_PIX_FMT_Y12I, + }, + { + .name = "Depth data 16-bit (Z16)", + .guid = UVC_GUID_FORMAT_Z16, + .fcc = V4L2_PIX_FMT_Z16, + }, + { + .name = "Bayer 10-bit (SRGGB10P)", + .guid = UVC_GUID_FORMAT_RW10, + .fcc = V4L2_PIX_FMT_SRGGB10P, + }, + { + .name = "Bayer 16-bit (SBGGR16)", + .guid = UVC_GUID_FORMAT_BG16, + .fcc = V4L2_PIX_FMT_SBGGR16, + }, + { + .name = "Bayer 16-bit (SGBRG16)", + .guid = UVC_GUID_FORMAT_GB16, + .fcc = V4L2_PIX_FMT_SGBRG16, + }, + { + .name = "Bayer 16-bit (SRGGB16)", + .guid = UVC_GUID_FORMAT_RG16, + .fcc = V4L2_PIX_FMT_SRGGB16, + }, + { + .name = "Bayer 16-bit (SGRBG16)", + .guid = UVC_GUID_FORMAT_GR16, + .fcc = V4L2_PIX_FMT_SGRBG16, + }, + { + .name = "Depth data 16-bit (Z16)", + .guid = UVC_GUID_FORMAT_INVZ, + .fcc = V4L2_PIX_FMT_Z16, + }, + { + .name = "Greyscale 10-bit (Y10 )", + .guid = UVC_GUID_FORMAT_INVI, + .fcc = V4L2_PIX_FMT_Y10, + }, + { + .name = "IR:Depth 26-bit (INZI)", + .guid = UVC_GUID_FORMAT_INZI, + .fcc = V4L2_PIX_FMT_INZI, + }, + { + .name = "4-bit Depth Confidence (Packed)", + .guid = UVC_GUID_FORMAT_CNF4, + .fcc = V4L2_PIX_FMT_CNF4, + }, + { + .name = "HEVC", + .guid = UVC_GUID_FORMAT_HEVC, + .fcc = V4L2_PIX_FMT_HEVC, + }, +}; + +static inline struct uvc_format_desc *uvc_format_by_guid(const u8 guid[16]) +{ + unsigned int len = ARRAY_SIZE(uvc_fmts); + unsigned int i; + + for (i = 0; i < len; ++i) { + if (memcmp(guid, uvc_fmts[i].guid, 16) == 0) + return &uvc_fmts[i]; + } + + return NULL; +} + +#endif /* __LINUX_V4L2_UVC_H */ -- cgit v1.2.3 From 466be4c9a6f0b7810991b4ac6c3e55345ea63954 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Fri, 27 Jan 2023 00:14:54 +0100 Subject: usb: uvc: move uvc_fmts and uvc_format_by_guid to own compile unit The media driver USB_VIDEO_CLASS and USB_F_UVC are using the same function uvc_format_by_guid. Since the function is inline, every user will get a copy of the used uvc_fmts array and the function. This patch moves the code to an own compile unit and add this dependency as UVC_COMMON to both users. Reviewed-by: Laurent Pinchart Reviewed-by: Daniel Scally Tested-by: Daniel Scally Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20230126231456.3402323-4-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/uvc.h | 210 +----------------------------------------------- 1 file changed, 1 insertion(+), 209 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index b010a36fc1d9..8cebb46602a1 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -148,220 +148,12 @@ { 'H', 'E', 'V', 'C', 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} -/* ------------------------------------------------------------------------ - * Video formats - */ - struct uvc_format_desc { char *name; u8 guid[16]; u32 fcc; }; -static struct uvc_format_desc uvc_fmts[] = { - { - .name = "YUV 4:2:2 (YUYV)", - .guid = UVC_GUID_FORMAT_YUY2, - .fcc = V4L2_PIX_FMT_YUYV, - }, - { - .name = "YUV 4:2:2 (YUYV)", - .guid = UVC_GUID_FORMAT_YUY2_ISIGHT, - .fcc = V4L2_PIX_FMT_YUYV, - }, - { - .name = "YUV 4:2:0 (NV12)", - .guid = UVC_GUID_FORMAT_NV12, - .fcc = V4L2_PIX_FMT_NV12, - }, - { - .name = "MJPEG", - .guid = UVC_GUID_FORMAT_MJPEG, - .fcc = V4L2_PIX_FMT_MJPEG, - }, - { - .name = "YVU 4:2:0 (YV12)", - .guid = UVC_GUID_FORMAT_YV12, - .fcc = V4L2_PIX_FMT_YVU420, - }, - { - .name = "YUV 4:2:0 (I420)", - .guid = UVC_GUID_FORMAT_I420, - .fcc = V4L2_PIX_FMT_YUV420, - }, - { - .name = "YUV 4:2:0 (M420)", - .guid = UVC_GUID_FORMAT_M420, - .fcc = V4L2_PIX_FMT_M420, - }, - { - .name = "YUV 4:2:2 (UYVY)", - .guid = UVC_GUID_FORMAT_UYVY, - .fcc = V4L2_PIX_FMT_UYVY, - }, - { - .name = "Greyscale 8-bit (Y800)", - .guid = UVC_GUID_FORMAT_Y800, - .fcc = V4L2_PIX_FMT_GREY, - }, - { - .name = "Greyscale 8-bit (Y8 )", - .guid = UVC_GUID_FORMAT_Y8, - .fcc = V4L2_PIX_FMT_GREY, - }, - { - .name = "Greyscale 8-bit (D3DFMT_L8)", - .guid = UVC_GUID_FORMAT_D3DFMT_L8, - .fcc = V4L2_PIX_FMT_GREY, - }, - { - .name = "IR 8-bit (L8_IR)", - .guid = UVC_GUID_FORMAT_KSMEDIA_L8_IR, - .fcc = V4L2_PIX_FMT_GREY, - }, - { - .name = "Greyscale 10-bit (Y10 )", - .guid = UVC_GUID_FORMAT_Y10, - .fcc = V4L2_PIX_FMT_Y10, - }, - { - .name = "Greyscale 12-bit (Y12 )", - .guid = UVC_GUID_FORMAT_Y12, - .fcc = V4L2_PIX_FMT_Y12, - }, - { - .name = "Greyscale 16-bit (Y16 )", - .guid = UVC_GUID_FORMAT_Y16, - .fcc = V4L2_PIX_FMT_Y16, - }, - { - .name = "BGGR Bayer (BY8 )", - .guid = UVC_GUID_FORMAT_BY8, - .fcc = V4L2_PIX_FMT_SBGGR8, - }, - { - .name = "BGGR Bayer (BA81)", - .guid = UVC_GUID_FORMAT_BA81, - .fcc = V4L2_PIX_FMT_SBGGR8, - }, - { - .name = "GBRG Bayer (GBRG)", - .guid = UVC_GUID_FORMAT_GBRG, - .fcc = V4L2_PIX_FMT_SGBRG8, - }, - { - .name = "GRBG Bayer (GRBG)", - .guid = UVC_GUID_FORMAT_GRBG, - .fcc = V4L2_PIX_FMT_SGRBG8, - }, - { - .name = "RGGB Bayer (RGGB)", - .guid = UVC_GUID_FORMAT_RGGB, - .fcc = V4L2_PIX_FMT_SRGGB8, - }, - { - .name = "RGB565", - .guid = UVC_GUID_FORMAT_RGBP, - .fcc = V4L2_PIX_FMT_RGB565, - }, - { - .name = "BGR 8:8:8 (BGR3)", - .guid = UVC_GUID_FORMAT_BGR3, - .fcc = V4L2_PIX_FMT_BGR24, - }, - { - .name = "BGRA/X 8:8:8:8 (BGR4)", - .guid = UVC_GUID_FORMAT_BGR4, - .fcc = V4L2_PIX_FMT_XBGR32, - }, - { - .name = "H.264", - .guid = UVC_GUID_FORMAT_H264, - .fcc = V4L2_PIX_FMT_H264, - }, - { - .name = "H.265", - .guid = UVC_GUID_FORMAT_H265, - .fcc = V4L2_PIX_FMT_HEVC, - }, - { - .name = "Greyscale 8 L/R (Y8I)", - .guid = UVC_GUID_FORMAT_Y8I, - .fcc = V4L2_PIX_FMT_Y8I, - }, - { - .name = "Greyscale 12 L/R (Y12I)", - .guid = UVC_GUID_FORMAT_Y12I, - .fcc = V4L2_PIX_FMT_Y12I, - }, - { - .name = "Depth data 16-bit (Z16)", - .guid = UVC_GUID_FORMAT_Z16, - .fcc = V4L2_PIX_FMT_Z16, - }, - { - .name = "Bayer 10-bit (SRGGB10P)", - .guid = UVC_GUID_FORMAT_RW10, - .fcc = V4L2_PIX_FMT_SRGGB10P, - }, - { - .name = "Bayer 16-bit (SBGGR16)", - .guid = UVC_GUID_FORMAT_BG16, - .fcc = V4L2_PIX_FMT_SBGGR16, - }, - { - .name = "Bayer 16-bit (SGBRG16)", - .guid = UVC_GUID_FORMAT_GB16, - .fcc = V4L2_PIX_FMT_SGBRG16, - }, - { - .name = "Bayer 16-bit (SRGGB16)", - .guid = UVC_GUID_FORMAT_RG16, - .fcc = V4L2_PIX_FMT_SRGGB16, - }, - { - .name = "Bayer 16-bit (SGRBG16)", - .guid = UVC_GUID_FORMAT_GR16, - .fcc = V4L2_PIX_FMT_SGRBG16, - }, - { - .name = "Depth data 16-bit (Z16)", - .guid = UVC_GUID_FORMAT_INVZ, - .fcc = V4L2_PIX_FMT_Z16, - }, - { - .name = "Greyscale 10-bit (Y10 )", - .guid = UVC_GUID_FORMAT_INVI, - .fcc = V4L2_PIX_FMT_Y10, - }, - { - .name = "IR:Depth 26-bit (INZI)", - .guid = UVC_GUID_FORMAT_INZI, - .fcc = V4L2_PIX_FMT_INZI, - }, - { - .name = "4-bit Depth Confidence (Packed)", - .guid = UVC_GUID_FORMAT_CNF4, - .fcc = V4L2_PIX_FMT_CNF4, - }, - { - .name = "HEVC", - .guid = UVC_GUID_FORMAT_HEVC, - .fcc = V4L2_PIX_FMT_HEVC, - }, -}; - -static inline struct uvc_format_desc *uvc_format_by_guid(const u8 guid[16]) -{ - unsigned int len = ARRAY_SIZE(uvc_fmts); - unsigned int i; - - for (i = 0; i < len; ++i) { - if (memcmp(guid, uvc_fmts[i].guid, 16) == 0) - return &uvc_fmts[i]; - } - - return NULL; -} +struct uvc_format_desc *uvc_format_by_guid(const u8 guid[16]); #endif /* __LINUX_V4L2_UVC_H */ -- cgit v1.2.3 From 8ecb17a86c0fbb86ea9fb4fa26e742600e945794 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Fri, 27 Jan 2023 00:14:55 +0100 Subject: usb: uvc: make uvc_format_desc table const Since the uvc_fmts array can not be modified we declare it const and change every user of the uvc_format_by_guid function aswell. Reviewed-by: Laurent Pinchart Reviewed-by: Daniel Scally Tested-by: Daniel Scally Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20230126231456.3402323-5-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/uvc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index 8cebb46602a1..b0210c5c5406 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -154,6 +154,6 @@ struct uvc_format_desc { u32 fcc; }; -struct uvc_format_desc *uvc_format_by_guid(const u8 guid[16]); +const struct uvc_format_desc *uvc_format_by_guid(const u8 guid[16]); #endif /* __LINUX_V4L2_UVC_H */ -- cgit v1.2.3 From 2d83eb5d24e1c8dba386928fcbf76d3b581a632d Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Fri, 27 Jan 2023 00:14:56 +0100 Subject: usb: uvc: use v4l2_fill_fmtdesc instead of open coded format name Since v4l2_fill_fmtdesc will be called in the ioctl v4l_enum_fmt anyway. We can set the format description and compressed flag from v4l_fill_fmtdesc and can remove the extra name field in uvc_format_desc. Reviewed-by: Daniel Scally Tested-by: Daniel Scally Signed-off-by: Michael Grzeschik Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20230126231456.3402323-6-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/uvc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index b0210c5c5406..88d96095bcb1 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -149,7 +149,6 @@ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} struct uvc_format_desc { - char *name; u8 guid[16]; u32 fcc; }; -- cgit v1.2.3 From 2bf40502badf9bc15a487244dd23ce0b08c306c0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 25 Jan 2023 16:34:25 +0200 Subject: usb: gadget: Use correct APIs and data types for UUID handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have two types for UUIDs depending on the byte ordering. Instead of explaining how bytes should go over the wire, use dedicated APIs and data types. This removes a confusion over the byte ordering. Signed-off-by: Andy Shevchenko Tested-By: Jó Ágila Bitsch Link: https://lore.kernel.org/r/20230125143425.85268-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/webusb.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/webusb.h b/include/linux/usb/webusb.h index b430d84357f3..fe43020b4a48 100644 --- a/include/linux/usb/webusb.h +++ b/include/linux/usb/webusb.h @@ -11,15 +11,12 @@ #include "uapi/linux/usb/ch9.h" /* - * little endian PlatformCapablityUUID for WebUSB + * Little Endian PlatformCapablityUUID for WebUSB * 3408b638-09a9-47a0-8bfd-a0768815b665 - * to identify Platform Device Capability descriptors as referring to WebUSB - * - * the UUID above MUST be sent over the wire as the byte sequence: - * {0x38, 0xB6, 0x08, 0x34, 0xA9, 0x09, 0xA0, 0x47, 0x8B, 0xFD, 0xA0, 0x76, 0x88, 0x15, 0xB6, 0x65}. + * to identify Platform Device Capability descriptors as referring to WebUSB. */ #define WEBUSB_UUID \ - UUID_INIT(0x38b60834, 0xa909, 0xa047, 0x8b, 0xfd, 0xa0, 0x76, 0x88, 0x15, 0xb6, 0x65) + GUID_INIT(0x3408b638, 0x09a9, 0x47a0, 0x8b, 0xfd, 0xa0, 0x76, 0x88, 0x15, 0xb6, 0x65) /* * WebUSB Platform Capability data -- cgit v1.2.3 From 40eaa8c0cbba4a27668c7e01373ccd5b38381e2f Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 23 Jan 2023 15:53:54 +0900 Subject: soc: apple: rtkit: Add apple_rtkit_idle() function This is yet another low power mode, used by DCP. Reviewed-by: Eric Curtin Reviewed-by: Sven Peter Signed-off-by: Hector Martin --- include/linux/soc/apple/rtkit.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index c9cabb679cd1..a2446b45ed7f 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -104,6 +104,11 @@ int apple_rtkit_wake(struct apple_rtkit *rtk); */ int apple_rtkit_shutdown(struct apple_rtkit *rtk); +/* + * Put the co-processor into idle mode + */ +int apple_rtkit_idle(struct apple_rtkit *rtk); + /* * Checks if RTKit is running and ready to handle messages. */ -- cgit v1.2.3 From 4435d63f172851b57a6a0943b73a6b8055a9356f Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Sat, 21 Jan 2023 16:42:53 +0900 Subject: soc: apple: rtkit: Add a private pointer to apple_rtkit_shmem This allows downstream consumers to keep track of private data for shmem mappings. In particular, the Rust abstraction will use this to safely drop data associated with a mapping when it is unmapped. Signed-off-by: Asahi Lina Reviewed-by: Sven Peter Reviewed-by: Eric Curtin Signed-off-by: Hector Martin --- include/linux/soc/apple/rtkit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index a2446b45ed7f..927b214ef1c6 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -22,6 +22,7 @@ * @size: Size of the shared memory buffer. * @iova: Device VA of shared memory buffer. * @is_mapped: Shared memory buffer is managed by the co-processor. + * @private: Private data pointer for the parent driver. */ struct apple_rtkit_shmem { @@ -30,6 +31,7 @@ struct apple_rtkit_shmem { size_t size; dma_addr_t iova; bool is_mapped; + void *private; }; /* -- cgit v1.2.3 From b3892860f50920ea46342d32bf51323877365082 Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Sat, 21 Jan 2023 16:41:35 +0900 Subject: soc: apple: rtkit: Export non-devm init/free functions While we normally encourage devm usage by drivers, some consumers (and in particular the upcoming Rust abstractions) might want to manually manage memory. Export the raw functions to make this possible. Signed-off-by: Asahi Lina Reviewed-by: Sven Peter Reviewed-by: Eric Curtin Signed-off-by: Hector Martin --- include/linux/soc/apple/rtkit.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index 927b214ef1c6..fc456f75c131 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -79,6 +79,25 @@ struct apple_rtkit *devm_apple_rtkit_init(struct device *dev, void *cookie, const char *mbox_name, int mbox_idx, const struct apple_rtkit_ops *ops); +/* + * Non-devm version of devm_apple_rtkit_init. Must be freed with + * apple_rtkit_free. + * + * @dev: Pointer to the device node this coprocessor is assocated with + * @cookie: opaque cookie passed to all functions defined in rtkit_ops + * @mbox_name: mailbox name used to communicate with the co-processor + * @mbox_idx: mailbox index to be used if mbox_name is NULL + * @ops: pointer to rtkit_ops to be used for this co-processor + */ +struct apple_rtkit *apple_rtkit_init(struct device *dev, void *cookie, + const char *mbox_name, int mbox_idx, + const struct apple_rtkit_ops *ops); + +/* + * Free an instance of apple_rtkit. + */ +void apple_rtkit_free(struct apple_rtkit *rtk); + /* * Reinitialize internal structures. Must only be called with the co-processor * is held in reset. -- cgit v1.2.3 From 2b8e35337605ca581f777cad73d7a6ac2ccbf6ec Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 30 Jan 2023 13:17:46 +0200 Subject: container_of: Update header inclusions The commit 848dba781f19 ("container_of: remove container_of_safe()") removed the code that uses err.h. Replace the inclusion by stddef.h which provides offsetof() definition which is still in use. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230130111746.59830-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/container_of.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/container_of.h b/include/linux/container_of.h index 1d898f9158b4..713890c867be 100644 --- a/include/linux/container_of.h +++ b/include/linux/container_of.h @@ -3,7 +3,7 @@ #define _LINUX_CONTAINER_OF_H #include -#include +#include #define typeof_member(T, m) typeof(((T*)0)->m) -- cgit v1.2.3 From 3bd3bc2ada84229e74f974d4dafcaca59ae8347f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 23 Jan 2023 16:49:49 +0000 Subject: soundwire: bus: Remove unused reset_page_addr() callback A previous patch removed unnecessary zeroing of the page registers after a paged transaction, so now the reset_page_addr callback is unused and can be removed. Signed-off-by: Richard Fitzgerald Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230123164949.245898-3-rf@opensource.cirrus.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 3cd2a761911f..a8d74635ea0d 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -838,7 +838,6 @@ struct sdw_defer { * @override_adr: Override value read from firmware (quirk for buggy firmware) * @xfer_msg: Transfer message callback * @xfer_msg_defer: Defer version of transfer message callback - * @reset_page_addr: Reset the SCP page address registers * @set_bus_conf: Set the bus configuration * @pre_bank_switch: Callback for pre bank switch * @post_bank_switch: Callback for post bank switch @@ -854,8 +853,6 @@ struct sdw_master_ops { enum sdw_command_response (*xfer_msg_defer) (struct sdw_bus *bus, struct sdw_msg *msg, struct sdw_defer *defer); - enum sdw_command_response (*reset_page_addr) - (struct sdw_bus *bus, unsigned int dev_num); int (*set_bus_conf)(struct sdw_bus *bus, struct sdw_bus_params *params); int (*pre_bank_switch)(struct sdw_bus *bus); -- cgit v1.2.3 From 43f1a7f905fcc796620c6488a7098068a05484ca Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 27 Jan 2023 16:51:04 +0000 Subject: soundwire: stream: Add specific prep/deprep commands to port_prep callback Currently, port_prep callback only has commands for PRE_PREP, PREP, and POST_PREP, which doesn't directly say whether this is for a prepare or deprepare call. Extend the command list enum to say whether the call is for prepare or deprepare aswell. Also remove SDW_OPS_PORT_PREP from sdw_port_prep_ops as this is unused, and update this enum to be simpler and more consistent with enum sdw_clk_stop_type. Note: Currently, the only users of SDW_OPS_PORT_POST_PREP are codec drivers sound/soc/codecs/wsa881x.c and sound/soc/codecs/wsa883x.c, both of which seem to assume that POST_PREP only occurs after a prepare, even though it would also have occurred after a deprepare. Since it doesn't make sense to mark the port prepared after a deprepare, changing the enum to separate PORT_DEPREP from PORT_PREP should make the check for PORT_PREP in those drivers be more logical. Signed-off-by: Stefan Binding Reviewed-by: Pierre-Louis Bossart Acked-By: Vinod Koul Link: https://lore.kernel.org/r/20230127165111.3010960-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 9e4537f409c2..7d9fbf9359e4 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -566,13 +566,15 @@ struct sdw_prepare_ch { * enum sdw_port_prep_ops: Prepare operations for Data Port * * @SDW_OPS_PORT_PRE_PREP: Pre prepare operation for the Port - * @SDW_OPS_PORT_PREP: Prepare operation for the Port + * @SDW_OPS_PORT_PRE_DEPREP: Pre deprepare operation for the Port * @SDW_OPS_PORT_POST_PREP: Post prepare operation for the Port + * @SDW_OPS_PORT_POST_DEPREP: Post deprepare operation for the Port */ enum sdw_port_prep_ops { SDW_OPS_PORT_PRE_PREP = 0, - SDW_OPS_PORT_PREP = 1, - SDW_OPS_PORT_POST_PREP = 2, + SDW_OPS_PORT_PRE_DEPREP, + SDW_OPS_PORT_POST_PREP, + SDW_OPS_PORT_POST_DEPREP, }; /** -- cgit v1.2.3 From 4e197ee880c24ecb63f7fe17449b3653bc64b03c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 31 Jan 2023 09:46:39 +0100 Subject: clk: imx6ul: add ethernet refclock mux support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ethernet refclock mux support and set it to internal clock by default. This configuration will not affect existing boards. clock tree before this patch: fec1 <- enet1_ref_125m (gate) <- enet1_ref (divider) <-, |- pll6_enet fec2 <- enet2_ref_125m (gate) <- enet2_ref (divider) <-´ after this patch: fec1 <- enet1_ref_sel(mux) <- enet1_ref_125m (gate) <- ... `--<> enet1_ref_pad |- pll6_enet fec2 <- enet2_ref_sel(mux) <- enet2_ref_125m (gate) <- ... `--<> enet2_ref_pad Signed-off-by: Oleksij Rempel Acked-by: Lee Jones Reviewed-by: Abel Vesa Signed-off-by: Abel Vesa Link: https://lore.kernel.org/r/20230131084642.709385-17-o.rempel@pengutronix.de --- include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index d4b5e527a7a3..09c6b3184bb0 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -451,8 +451,10 @@ #define IMX6SX_GPR12_PCIE_RX_EQ_2 (0x2 << 0) /* For imx6ul iomux gpr register field define */ -#define IMX6UL_GPR1_ENET1_CLK_DIR (0x1 << 17) -#define IMX6UL_GPR1_ENET2_CLK_DIR (0x1 << 18) +#define IMX6UL_GPR1_ENET2_TX_CLK_DIR BIT(18) +#define IMX6UL_GPR1_ENET1_TX_CLK_DIR BIT(17) +#define IMX6UL_GPR1_ENET2_CLK_SEL BIT(14) +#define IMX6UL_GPR1_ENET1_CLK_SEL BIT(13) #define IMX6UL_GPR1_ENET1_CLK_OUTPUT (0x1 << 17) #define IMX6UL_GPR1_ENET2_CLK_OUTPUT (0x1 << 18) #define IMX6UL_GPR1_ENET_CLK_DIR (0x3 << 17) -- cgit v1.2.3 From cbdb1f163af2bb90d01be1f0263df1d8d5c9d9d3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 28 Nov 2022 16:10:03 +0200 Subject: vdso/bits.h: Add BIT_ULL() for the sake of consistency The minimization done in 3945ff37d2f4 ("linux/bits.h: Extract common header for vDSO") was required to isolate the VDSO build from the larger kernel header impact. The split added some inconsistency since BIT() and BIT_ULL() are now defined in the different files which confuses unprepared reader. Move BIT_ULL() to vdso/bits.h. No functional change. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20221128141003.77929-1-andriy.shevchenko@linux.intel.com --- include/linux/bits.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bits.h b/include/linux/bits.h index 87d112650dfb..7c0cf5031abe 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -6,7 +6,6 @@ #include #include -#define BIT_ULL(nr) (ULL(1) << (nr)) #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) -- cgit v1.2.3 From 5a5d7e9badd2cb8065db171961bd30bd3595e4b6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Jan 2023 16:08:31 +0100 Subject: cpuidle: lib/bug: Disable rcu_is_watching() during WARN/BUG In order to avoid WARN/BUG from generating nested or even recursive warnings, force rcu_is_watching() true during WARN/lockdep_rcu_suspicious(). Notably things like unwinding the stack can trigger rcu_dereference() warnings, which then triggers more unwinding which then triggers more warnings etc.. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230126151323.408156109@infradead.org --- include/linux/context_tracking.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index dcef4a9e4d63..d4afa8508a80 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -130,9 +130,36 @@ static __always_inline unsigned long ct_state_inc(int incby) return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.state)); } +static __always_inline bool warn_rcu_enter(void) +{ + bool ret = false; + + /* + * Horrible hack to shut up recursive RCU isn't watching fail since + * lots of the actual reporting also relies on RCU. + */ + preempt_disable_notrace(); + if (rcu_dynticks_curr_cpu_in_eqs()) { + ret = true; + ct_state_inc(RCU_DYNTICKS_IDX); + } + + return ret; +} + +static __always_inline void warn_rcu_exit(bool rcu) +{ + if (rcu) + ct_state_inc(RCU_DYNTICKS_IDX); + preempt_enable_notrace(); +} + #else static inline void ct_idle_enter(void) { } static inline void ct_idle_exit(void) { } + +static __always_inline bool warn_rcu_enter(void) { return false; } +static __always_inline void warn_rcu_exit(bool rcu) { } #endif /* !CONFIG_CONTEXT_TRACKING_IDLE */ #endif -- cgit v1.2.3 From d099dbfd330686a8c09cd8944bcc77a56f9e7815 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Jan 2023 16:08:32 +0100 Subject: cpuidle: tracing: Warn about !rcu_is_watching() When using noinstr, WARN when tracing hits when RCU is disabled. Suggested-by: Steven Rostedt (Google) Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230126151323.466670589@infradead.org --- include/linux/trace_recursion.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index c303f7a114e9..d48cd92d2364 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -135,6 +135,21 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); # define do_ftrace_record_recursion(ip, pip) do { } while (0) #endif +#ifdef CONFIG_ARCH_WANTS_NO_INSTR +# define trace_warn_on_no_rcu(ip) \ + ({ \ + bool __ret = !rcu_is_watching(); \ + if (__ret && !trace_recursion_test(TRACE_RECORD_RECURSION_BIT)) { \ + trace_recursion_set(TRACE_RECORD_RECURSION_BIT); \ + WARN_ONCE(true, "RCU not on for: %pS\n", (void *)ip); \ + trace_recursion_clear(TRACE_RECORD_RECURSION_BIT); \ + } \ + __ret; \ + }) +#else +# define trace_warn_on_no_rcu(ip) false +#endif + /* * Preemption is promised to be disabled when return bit >= 0. */ @@ -144,6 +159,9 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign unsigned int val = READ_ONCE(current->trace_recursion); int bit; + if (trace_warn_on_no_rcu(ip)) + return -1; + bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* -- cgit v1.2.3 From 8739c6811572b087decd561f96382087402cc343 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Jan 2023 16:08:36 +0100 Subject: sched/clock/x86: Mark sched_clock() noinstr In order to use sched_clock() from noinstr code, mark it and all it's implenentations noinstr. The whole pvclock thing (used by KVM/Xen) is a bit of a pain, since it calls out to watchdogs, create a pvclock_clocksource_read_nowd() variant doesn't do that and can be noinstr. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230126151323.702003578@infradead.org --- include/linux/math64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/math64.h b/include/linux/math64.h index 8958f4c005c1..8b9191a2849e 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -161,7 +161,7 @@ static inline u64 mul_u32_u32(u32 a, u32 b) #if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) #ifndef mul_u64_u32_shr -static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) +static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) { return (u64)(((unsigned __int128)a * mul) >> shift); } @@ -177,7 +177,7 @@ static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift) #else #ifndef mul_u64_u32_shr -static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) +static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) { u32 ah, al; u64 ret; -- cgit v1.2.3 From 776f22913b8e50011004c6ae43004711dab7efa5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Jan 2023 16:08:37 +0100 Subject: sched/clock: Make local_clock() noinstr With sched_clock() noinstr, provide a noinstr implementation of local_clock(). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230126151323.760767043@infradead.org --- include/linux/sched/clock.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index 867d588314e0..ca008f7d3615 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -45,7 +45,7 @@ static inline u64 cpu_clock(int cpu) return sched_clock(); } -static inline u64 local_clock(void) +static __always_inline u64 local_clock(void) { return sched_clock(); } @@ -79,10 +79,8 @@ static inline u64 cpu_clock(int cpu) return sched_clock_cpu(cpu); } -static inline u64 local_clock(void) -{ - return sched_clock_cpu(raw_smp_processor_id()); -} +extern u64 local_clock(void); + #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING -- cgit v1.2.3 From dd0b9619a21ef77127e6002f9cb2d254c03ceed1 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 19 Jan 2023 15:32:10 +0800 Subject: soundwire: cadence: use directly bus sdw_defer structure Copying the bus sdw_defer structure into the Cadence internals leads to using stale pointers and kernel oopses on errors. It's just simpler and safer to use the bus sdw_defer structure directly. Link: https://github.com/thesofproject/linux/issues/4056 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230119073211.85979-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index a8d74635ea0d..9e5fc0307284 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -851,8 +851,7 @@ struct sdw_master_ops { enum sdw_command_response (*xfer_msg) (struct sdw_bus *bus, struct sdw_msg *msg); enum sdw_command_response (*xfer_msg_defer) - (struct sdw_bus *bus, struct sdw_msg *msg, - struct sdw_defer *defer); + (struct sdw_bus *bus, struct sdw_msg *msg); int (*set_bus_conf)(struct sdw_bus *bus, struct sdw_bus_params *params); int (*pre_bank_switch)(struct sdw_bus *bus); -- cgit v1.2.3 From 66f95de7c13be5e442d8ed4cf00e13f8dbdc1315 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 19 Jan 2023 15:32:11 +0800 Subject: soundwire: cadence: further simplify low-level xfer_msg_defer() callback The message pointer is already stored in the bus->defer structure, not need to pass it as an argument. Suggested-by: Ranjani Sridharan Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230119073211.85979-5-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 9e5fc0307284..d09a9857e1de 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -837,7 +837,8 @@ struct sdw_defer { * @read_prop: Read Master properties * @override_adr: Override value read from firmware (quirk for buggy firmware) * @xfer_msg: Transfer message callback - * @xfer_msg_defer: Defer version of transfer message callback + * @xfer_msg_defer: Defer version of transfer message callback. The message is handled with the + * bus struct @sdw_defer * @set_bus_conf: Set the bus configuration * @pre_bank_switch: Callback for pre bank switch * @post_bank_switch: Callback for post bank switch @@ -851,7 +852,7 @@ struct sdw_master_ops { enum sdw_command_response (*xfer_msg) (struct sdw_bus *bus, struct sdw_msg *msg); enum sdw_command_response (*xfer_msg_defer) - (struct sdw_bus *bus, struct sdw_msg *msg); + (struct sdw_bus *bus); int (*set_bus_conf)(struct sdw_bus *bus, struct sdw_bus_params *params); int (*pre_bank_switch)(struct sdw_bus *bus); -- cgit v1.2.3 From 5e6a51787fef20b849682d8c49ec9c2beed5c373 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 24 Jan 2023 15:38:38 +0200 Subject: uuid: Decouple guid_t and uuid_le types and respective macros The guid_t type and respective macros are being used internally only. The uuid_le has its user outside the kernel. Decouple these types and macros, and make guid_t completely internal type to the kernel. Signed-off-by: Andy Shevchenko Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230124133838.22645-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/uuid.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 8cdc0d3567cd..5be158a49e11 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -8,15 +8,25 @@ #ifndef _LINUX_UUID_H_ #define _LINUX_UUID_H_ -#include #include #define UUID_SIZE 16 +typedef struct { + __u8 b[UUID_SIZE]; +} guid_t; + typedef struct { __u8 b[UUID_SIZE]; } uuid_t; +#define GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((guid_t) \ +{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + #define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ ((uuid_t) \ {{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \ @@ -97,10 +107,12 @@ extern const u8 uuid_index[16]; int guid_parse(const char *uuid, guid_t *u); int uuid_parse(const char *uuid, uuid_t *u); -/* backwards compatibility, don't use in new code */ -static inline int uuid_le_cmp(const guid_t u1, const guid_t u2) +/* MEI UUID type, don't use anywhere else */ +#include + +static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2) { - return memcmp(&u1, &u2, sizeof(guid_t)); + return memcmp(&u1, &u2, sizeof(uuid_le)); } #endif -- cgit v1.2.3 From d931b83e62b1dd352fc326c0b1cf3be3ef19e113 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Tue, 24 Jan 2023 16:40:47 +0100 Subject: cacheinfo: Make default acpi_get_cache_info() return an error commit bd500361a937 ("ACPI: PPTT: Update acpi_find_last_cache_level() to acpi_get_cache_info()") updates the prototype of acpi_get_cache_info(). The cache 'levels' is update through a pointer and not the return value of the function. If CONFIG_ACPI_PPTT is not defined, acpi_get_cache_info() doesn't update its *levels and *split_levels parameters and returns 0. This can lead to a faulty behaviour. Make acpi_get_cache_info() return an error code if CONFIG_ACPI_PPTT is not defined. Also, In init_cache_level(), if no PPTT is present or CONFIG_ACPI_PPTT is not defined, instead of aborting if acpi_get_cache_info() returns an error code, just continue. This allows to try fetching the cache information from clidr_el1. Signed-off-by: Pierre Gondois Link: https://lore.kernel.org/r/20230124154053.355376-3-pierre.gondois@arm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/cacheinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index dfef57077cd0..908e19d17f49 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -100,7 +100,7 @@ static inline int acpi_get_cache_info(unsigned int cpu, unsigned int *levels, unsigned int *split_levels) { - return 0; + return -ENOENT; } #else int acpi_get_cache_info(unsigned int cpu, -- cgit v1.2.3 From 2d97930d74b12467fd5f48d8560e48c1cf5edcb1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 31 Jan 2023 05:01:32 +0000 Subject: block: Remove mm.h from bvec.h This was originally added for the definition of nth_page(), but we no longer use nth_page() in this header, so we can drop the heavyweight mm.h now. Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20230131050132.2627124-1-willy@infradead.org Signed-off-by: Jens Axboe --- include/linux/bvec.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 35c25dff651a..7939b345ee7f 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -12,7 +12,6 @@ #include #include #include -#include #include struct page; -- cgit v1.2.3 From 4971c268b85e1c7a734a61622fc0813c86e2362e Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Tue, 31 Jan 2023 18:42:43 +0100 Subject: ima: Align ima_file_mmap() parameters with mmap_file LSM hook Commit 98de59bfe4b2f ("take calculation of final prot in security_mmap_file() into a helper") moved the code to update prot, to be the actual protections applied to the kernel, to a new helper called mmap_prot(). However, while without the helper ima_file_mmap() was getting the updated prot, with the helper ima_file_mmap() gets the original prot, which contains the protections requested by the application. A possible consequence of this change is that, if an application calls mmap() with only PROT_READ, and the kernel applies PROT_EXEC in addition, that application would have access to executable memory without having this event recorded in the IMA measurement list. This situation would occur for example if the application, before mmap(), calls the personality() system call with READ_IMPLIES_EXEC as the first argument. Align ima_file_mmap() parameters with those of the mmap_file LSM hook, so that IMA can receive both the requested prot and the final prot. Since the requested protections are stored in a new variable, and the final protections are stored in the existing variable, this effectively restores the original behavior of the MMAP_CHECK hook. Cc: stable@vger.kernel.org Fixes: 98de59bfe4b2 ("take calculation of final prot in security_mmap_file() into a helper") Signed-off-by: Roberto Sassu Reviewed-by: Stefan Berger Signed-off-by: Mimi Zohar --- include/linux/ima.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 5a0b2a285a18..d79fee67235e 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -21,7 +21,8 @@ extern int ima_file_check(struct file *file, int mask); extern void ima_post_create_tmpfile(struct user_namespace *mnt_userns, struct inode *inode); extern void ima_file_free(struct file *file); -extern int ima_file_mmap(struct file *file, unsigned long prot); +extern int ima_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot); extern int ima_load_data(enum kernel_load_data_id id, bool contents); extern int ima_post_load_data(char *buf, loff_t size, @@ -76,7 +77,8 @@ static inline void ima_file_free(struct file *file) return; } -static inline int ima_file_mmap(struct file *file, unsigned long prot) +static inline int ima_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { return 0; } -- cgit v1.2.3 From 4b1936cd081496865151eaab539f767240952306 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Thu, 26 Jan 2023 20:55:45 +0000 Subject: platform/chrome: cros_ec: Add VDM attention headers Incorporate updates to the EC headers to support the retrieval of VDM Attention messages from port partners. These headers are already present in the ChromeOS EC codebase. [1] [1] https://source.chromium.org/chromium/chromiumos/platform/ec/+/main:include/ec_commands.h Signed-off-by: Prashant Malani [pmalani: Removed extra tab in header #define] Reviewed-by: Benson Leung Reviewed-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20230126205620.3714994-1-pmalani@chromium.org --- include/linux/platform_data/cros_ec_commands.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index b9c4a3964247..b3b3df163efc 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5862,6 +5862,7 @@ enum tcpc_cc_polarity { #define PD_STATUS_EVENT_MUX_1_SET_DONE BIT(5) #define PD_STATUS_EVENT_VDM_REQ_REPLY BIT(6) #define PD_STATUS_EVENT_VDM_REQ_FAILED BIT(7) +#define PD_STATUS_EVENT_VDM_ATTENTION BIT(8) struct ec_params_typec_status { uint8_t port; @@ -5906,7 +5907,8 @@ struct ec_response_typec_status { } __ec_align1; /* - * Gather the response to the most recent VDM REQ from the AP + * Gather the response to the most recent VDM REQ from the AP, as well + * as popping the oldest VDM:Attention from the DPM queue */ #define EC_CMD_TYPEC_VDM_RESPONSE 0x013C @@ -5919,10 +5921,18 @@ struct ec_response_typec_vdm_response { uint8_t vdm_data_objects; /* Partner to address - see enum typec_partner_type */ uint8_t partner_type; - /* Reserved */ - uint16_t reserved; + /* enum ec_status describing VDM response */ + uint16_t vdm_response_err; /* VDM data, including VDM header */ uint32_t vdm_response[VDO_MAX_SIZE]; + /* Number of 32-bit Attention fields filled in */ + uint8_t vdm_attention_objects; + /* Number of remaining messages to consume */ + uint8_t vdm_attention_left; + /* Reserved */ + uint16_t reserved1; + /* VDM:Attention contents */ + uint32_t vdm_attention[2]; } __ec_align1; #undef VDO_MAX_SIZE -- cgit v1.2.3 From c1085957dece02bda586cbffc1328f3bca09325f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 12 Jan 2023 21:34:43 +0800 Subject: f2fs: clarify compress level bit offset commit 3fde13f817e2 ("f2fs: compress: support compress level") introduce compress level, which macro(COMPRESS_LEVEL_OFFSET) is 8, But use wrong comment about compress level. Let's fix it. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ee0d75d9a302..1701f25117ea 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -315,7 +315,7 @@ struct f2fs_inode { __u8 i_log_cluster_size; /* log of cluster size */ __le16 i_compress_flag; /* compress flag */ /* 0 bit: chksum flag - * [10,15] bits: compress level + * [8,15] bits: compress level */ __le32 i_extra_end[0]; /* for attribute size calculation */ } __packed; -- cgit v1.2.3 From 4f64a6c9f6f11e8b7314f8e27e2c4568706009e6 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 27 Jan 2023 14:31:41 +0000 Subject: perf: Fix perf_event_pmu_context serialization Syzkaller triggered a WARN in put_pmu_ctx(). WARNING: CPU: 1 PID: 2245 at kernel/events/core.c:4925 put_pmu_ctx+0x1f0/0x278 This is because there is no locking around the access of "if (!epc->ctx)" in find_get_pmu_context() and when it is set to NULL in put_pmu_ctx(). The decrement of the reference count in put_pmu_ctx() also happens outside of the spinlock, leading to the possibility of this order of events, and the context being cleared in put_pmu_ctx(), after its refcount is non zero: CPU0 CPU1 find_get_pmu_context() if (!epc->ctx) == false put_pmu_ctx() atomic_dec_and_test(&epc->refcount) == true epc->refcount == 0 atomic_inc(&epc->refcount); epc->refcount == 1 list_del_init(&epc->pmu_ctx_entry); epc->ctx = NULL; Another issue is that WARN_ON for no active PMU events in put_pmu_ctx() is outside of the lock. If the perf_event_pmu_context is an embedded one, even after clearing it, it won't be deleted and can be re-used. So the warning can trigger. For this reason it also needs to be moved inside the lock. The above warning is very quick to trigger on Arm by running these two commands at the same time: while true; do perf record -- ls; done while true; do perf record -- ls; done [peterz: atomic_dec_and_raw_lock*()] Fixes: bd2756811766 ("perf: Rewrite core context handling") Reported-by: syzbot+697196bc0265049822bd@syzkaller.appspotmail.com Signed-off-by: James Clark Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ravi Bangoria Link: https://lore.kernel.org/r/20230127143141.1782804-2-james.clark@arm.com --- include/linux/spinlock.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 1341f7d62da4..be48f1cb1878 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -476,6 +476,15 @@ extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, #define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) +extern int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock); +#define atomic_dec_and_raw_lock(atomic, lock) \ + __cond_lock(lock, _atomic_dec_and_raw_lock(atomic, lock)) + +extern int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock, + unsigned long *flags); +#define atomic_dec_and_raw_lock_irqsave(atomic, lock, flags) \ + __cond_lock(lock, _atomic_dec_and_raw_lock_irqsave(atomic, lock, &(flags))) + int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, size_t max_size, unsigned int cpu_mult, gfp_t gfp, const char *name, -- cgit v1.2.3 From 55ab834a86a9934c4f17825c115f7dc16a89aae7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 16 Dec 2022 10:46:33 +0100 Subject: Revert "mm: add nodes= arg to memory.reclaim" This reverts commit 12a5d3955227b0d7e04fb793ccceeb2a1dd275c5. Although it is recognized that a finer grained pro-active reclaim is something we need and want the semantic of this implementation is really ambiguous. In a follow up discussion it became clear that there are two essential usecases here. One is to use memory.reclaim to pro-actively reclaim memory and expectation is that the requested and reported amount of memory is uncharged from the memcg. Another usecase focuses on pro-active demotion when the memory is merely shuffled around to demotion targets while the overall charged memory stays unchanged. The current implementation considers demoted pages as reclaimed and that break both usecases. [1] has tried to address the reporting part but there are more issues with that summarized in [2] and follow up emails. Let's revert the nodemask based extension of the memcg pro-active reclaim for now until we settle with a more robust semantic. [1] http://lkml.kernel.org/r/http://lkml.kernel.org/r/20221206023406.3182800-1-almasrymina@google.com [2] http://lkml.kernel.org/r/Y5bsmpCyeryu3Zz1@dhcp22.suse.cz Link: https://lkml.kernel.org/r/Y5xASNe1x8cusiTx@dhcp22.suse.cz Fixes: 12a5d3955227b0d ("mm: add nodes= arg to memory.reclaim") Signed-off-by: Michal Hocko Cc: Bagas Sanjaya Cc: Huang Ying Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Mina Almasry Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tejun Heo Cc: Wei Xu Cc: Yang Shi Cc: Yosry Ahmed Cc: zefan li Signed-off-by: Andrew Morton --- include/linux/swap.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2787b84eaf12..0ceed49516ad 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -418,8 +418,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - unsigned int reclaim_options, - nodemask_t *nodemask); + unsigned int reclaim_options); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, -- cgit v1.2.3 From 3489dbb696d25602aea8c3e669a6d43b76bd5358 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 26 Jan 2023 14:27:20 -0800 Subject: mm: hugetlb: proc: check for hugetlb shared PMD in /proc/PID/smaps Patch series "Fixes for hugetlb mapcount at most 1 for shared PMDs". This issue of mapcount in hugetlb pages referenced by shared PMDs was discussed in [1]. The following two patches address user visible behavior caused by this issue. [1] https://lore.kernel.org/linux-mm/Y9BF+OCdWnCSilEu@monkey/ This patch (of 2): A hugetlb page will have a mapcount of 1 if mapped by multiple processes via a shared PMD. This is because only the first process increases the map count, and subsequent processes just add the shared PMD page to their page table. page_mapcount is being used to decide if a hugetlb page is shared or private in /proc/PID/smaps. Pages referenced via a shared PMD were incorrectly being counted as private. To fix, check for a shared PMD if mapcount is 1. If a shared PMD is found count the hugetlb page as shared. A new helper to check for a shared PMD is added. [akpm@linux-foundation.org: simplification, per David] [akpm@linux-foundation.org: hugetlb.h: include page_ref.h for page_count()] Link: https://lkml.kernel.org/r/20230126222721.222195-2-mike.kravetz@oracle.com Fixes: 25ee01a2fca0 ("mm: hugetlb: proc: add hugetlb-related fields to /proc/PID/smaps") Signed-off-by: Mike Kravetz Acked-by: Peter Xu Cc: David Hildenbrand Cc: James Houghton Cc: Matthew Wilcox Cc: Michal Hocko Cc: Muchun Song Cc: Naoya Horiguchi Cc: Vishal Moola (Oracle) Cc: Yang Shi Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 551834cd5299..db194e2ba69f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -1187,6 +1188,18 @@ static inline __init void hugetlb_cma_reserve(int order) } #endif +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +static inline bool hugetlb_pmd_shared(pte_t *pte) +{ + return page_count(virt_to_page(pte)) > 1; +} +#else +static inline bool hugetlb_pmd_shared(pte_t *pte) +{ + return false; +} +#endif + bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE -- cgit v1.2.3 From 88d7b12068b95731c280af8ce88e8ee9561f96de Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 26 Jan 2023 20:07:27 +0000 Subject: highmem: round down the address passed to kunmap_flush_on_unmap() We already round down the address in kunmap_local_indexed() which is the other implementation of __kunmap_local(). The only implementation of kunmap_flush_on_unmap() is PA-RISC which is expecting a page-aligned address. This may be causing PA-RISC to be flushing the wrong addresses currently. Link: https://lkml.kernel.org/r/20230126200727.1680362-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Fixes: 298fa1ad5571 ("highmem: Provide generic variant of kmap_atomic*") Reviewed-by: Ira Weiny Cc: "Fabio M. De Francesco" Cc: Al Viro Cc: Thomas Gleixner Cc: Helge Deller Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Bagas Sanjaya Cc: David Sterba Cc: Kees Cook Cc: Sebastian Andrzej Siewior Cc: Tony Luck Cc: Signed-off-by: Andrew Morton --- include/linux/highmem-internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 034b1106d022..e098f38422af 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -200,7 +200,7 @@ static inline void *kmap_local_pfn(unsigned long pfn) static inline void __kunmap_local(const void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(addr); + kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE)); #endif } @@ -227,7 +227,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn) static inline void __kunmap_atomic(const void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(addr); + kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE)); #endif pagefault_enable(); if (IS_ENABLED(CONFIG_PREEMPT_RT)) -- cgit v1.2.3 From ac86f547ca1002aec2ef66b9e64d03f45bbbfbb9 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sun, 29 Jan 2023 12:09:45 +0800 Subject: mm: memcg: fix NULL pointer in mem_cgroup_track_foreign_dirty_slowpath() As commit 18365225f044 ("hwpoison, memcg: forcibly uncharge LRU pages"), hwpoison will forcibly uncharg a LRU hwpoisoned page, the folio_memcg could be NULl, then, mem_cgroup_track_foreign_dirty_slowpath() could occurs a NULL pointer dereference, let's do not record the foreign writebacks for folio memcg is null in mem_cgroup_track_foreign_dirty() to fix it. Link: https://lkml.kernel.org/r/20230129040945.180629-1-wangkefeng.wang@huawei.com Fixes: 97b27821b485 ("writeback, memcg: Implement foreign dirty flushing") Signed-off-by: Kefeng Wang Reported-by: Ma Wupeng Tested-by: Miko Larsson Acked-by: Michal Hocko Cc: Jan Kara Cc: Jens Axboe Cc: Kefeng Wang Cc: Ma Wupeng Cc: Naoya Horiguchi Cc: Shakeel Butt Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d3c8203cab6c..85dc9b88ea37 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1666,10 +1666,13 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, struct bdi_writeback *wb) { + struct mem_cgroup *memcg; + if (mem_cgroup_disabled()) return; - if (unlikely(&folio_memcg(folio)->css != wb->memcg_css)) + memcg = folio_memcg(folio); + if (unlikely(memcg && &memcg->css != wb->memcg_css)) mem_cgroup_track_foreign_dirty_slowpath(folio, wb); } -- cgit v1.2.3 From 303feb3cc06ac0665d0ee9c1414941200e60e8a3 Mon Sep 17 00:00:00 2001 From: Amit Kumar Mahapatra Date: Fri, 20 Jan 2023 00:23:30 +0530 Subject: spi: Add APIs in spi core to set/get spi->chip_select and spi->cs_gpiod Supporting multi-cs in spi core and spi controller drivers would require the chip_select & cs_gpiod members of struct spi_device to be an array. But changing the type of these members to array would break the spi driver functionality. To make the transition smoother introduced four new APIs to get/set the spi->chip_select & spi->cs_gpiod and replaced all spi->chip_select and spi->cs_gpiod references in spi core with the API calls. While adding multi-cs support in further patches the chip_select & cs_gpiod members of the spi_device structure would be converted to arrays & the "idx" parameter of the APIs would be used as array index i.e., spi->chip_select[idx] & spi->cs_gpiod[idx] respectively. Suggested-by: Lars-Peter Clausen Signed-off-by: Amit Kumar Mahapatra Reviewed-by: Michal Simek Link: https://lore.kernel.org/r/20230119185342.2093323-2-amit.kumar-mahapatra@amd.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 9a32495fbb1f..9b23a1d0dd0d 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -263,6 +263,26 @@ static inline void *spi_get_drvdata(struct spi_device *spi) return dev_get_drvdata(&spi->dev); } +static inline u8 spi_get_chipselect(struct spi_device *spi, u8 idx) +{ + return spi->chip_select; +} + +static inline void spi_set_chipselect(struct spi_device *spi, u8 idx, u8 chipselect) +{ + spi->chip_select = chipselect; +} + +static inline struct gpio_desc *spi_get_csgpiod(struct spi_device *spi, u8 idx) +{ + return spi->cs_gpiod; +} + +static inline void spi_set_csgpiod(struct spi_device *spi, u8 idx, struct gpio_desc *csgpiod) +{ + spi->cs_gpiod = csgpiod; +} + struct spi_message; /** -- cgit v1.2.3 From 82d40986a6a34a83f9d4df35241ff109e9468c48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 14:32:54 +0200 Subject: input: remove pxa930_trkball driver The pxa930 SoC support is getting removed, and no upstream board ever provided the trkball device that this driver relies on. Cc: Dmitry Torokhov Cc: Arnd Bergmann Cc: linux-input@vger.kernel.org Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- include/linux/platform_data/mouse-pxa930_trkball.h | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 include/linux/platform_data/mouse-pxa930_trkball.h (limited to 'include/linux') diff --git a/include/linux/platform_data/mouse-pxa930_trkball.h b/include/linux/platform_data/mouse-pxa930_trkball.h deleted file mode 100644 index ba0ac7a30d8c..000000000000 --- a/include/linux/platform_data/mouse-pxa930_trkball.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARCH_PXA930_TRKBALL_H -#define __ASM_ARCH_PXA930_TRKBALL_H - -struct pxa930_trkball_platform_data { - int x_filter; - int y_filter; -}; - -#endif /* __ASM_ARCH_PXA930_TRKBALL_H */ - -- cgit v1.2.3 From 119df5ee5b56392070936199857de4ddaabf0b89 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Sep 2022 14:35:42 +0200 Subject: input: remove pxa930_rotary keyboard driver The pxa930 platform is getting removed and no upstream machine ever defined a rotary keyboard device. Cc: Dmitry Torokhov Cc: linux-input@vger.kernel.org Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- .../linux/platform_data/keyboard-pxa930_rotary.h | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 include/linux/platform_data/keyboard-pxa930_rotary.h (limited to 'include/linux') diff --git a/include/linux/platform_data/keyboard-pxa930_rotary.h b/include/linux/platform_data/keyboard-pxa930_rotary.h deleted file mode 100644 index 3271aa01cbe8..000000000000 --- a/include/linux/platform_data/keyboard-pxa930_rotary.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARCH_PXA930_ROTARY_H -#define __ASM_ARCH_PXA930_ROTARY_H - -/* NOTE: - * - * rotary can be either interpreted as a ralative input event (e.g. - * REL_WHEEL or REL_HWHEEL) or a specific key event (e.g. UP/DOWN - * or LEFT/RIGHT), depending on if up_key & down_key are assigned - * or rel_code is assigned a non-zero value. When all are non-zero, - * up_key and down_key will be preferred. - */ -struct pxa930_rotary_platform_data { - int up_key; - int down_key; - int rel_code; -}; - -void __init pxa930_set_rotarykey_info(struct pxa930_rotary_platform_data *info); - -#endif /* __ASM_ARCH_PXA930_ROTARY_H */ -- cgit v1.2.3 From b401d1fd805379344750dffb0e3938676a047a2a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 Sep 2022 16:27:43 +0200 Subject: ASoC: pxa: remove unused board support Most PXA/MMP boards were removed, so the board specific ASoC support is no longer needed, leaving only support for DT based ones, as well as the "gumstix" and "spitz" machines that may get converted to DT later. Cc: Ian Molton Cc: Ken McGuire Cc: Marek Vasut Cc: Mike Rapoport Cc: Liam Girdwood Cc: Mark Brown Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: alsa-devel@alsa-project.org Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- include/linux/platform_data/asoc-palm27x.h | 9 --------- include/linux/platform_data/asoc-poodle.h | 16 ---------------- include/linux/platform_data/mmp_audio.h | 18 ------------------ 3 files changed, 43 deletions(-) delete mode 100644 include/linux/platform_data/asoc-palm27x.h delete mode 100644 include/linux/platform_data/asoc-poodle.h delete mode 100644 include/linux/platform_data/mmp_audio.h (limited to 'include/linux') diff --git a/include/linux/platform_data/asoc-palm27x.h b/include/linux/platform_data/asoc-palm27x.h deleted file mode 100644 index 22b69a393a57..000000000000 --- a/include/linux/platform_data/asoc-palm27x.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _INCLUDE_PALMASOC_H_ -#define _INCLUDE_PALMASOC_H_ - -struct palm27x_asoc_info { - int jack_gpio; -}; - -#endif diff --git a/include/linux/platform_data/asoc-poodle.h b/include/linux/platform_data/asoc-poodle.h deleted file mode 100644 index 2052fad55c5c..000000000000 --- a/include/linux/platform_data/asoc-poodle.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_PLATFORM_DATA_POODLE_AUDIO -#define __LINUX_PLATFORM_DATA_POODLE_AUDIO - -/* locomo is not a proper gpio driver, and uses its own api */ -struct poodle_audio_platform_data { - struct device *locomo_dev; - - int gpio_amp_on; - int gpio_mute_l; - int gpio_mute_r; - int gpio_232vcc_on; - int gpio_jk_b; -}; - -#endif diff --git a/include/linux/platform_data/mmp_audio.h b/include/linux/platform_data/mmp_audio.h deleted file mode 100644 index 83428d8ee18d..000000000000 --- a/include/linux/platform_data/mmp_audio.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * MMP Platform AUDIO Management - * - * Copyright (c) 2011 Marvell Semiconductors Inc. - */ - -#ifndef MMP_AUDIO_H -#define MMP_AUDIO_H - -struct mmp_audio_platdata { - u32 period_max_capture; - u32 buffer_max_capture; - u32 period_max_playback; - u32 buffer_max_playback; -}; - -#endif /* MMP_AUDIO_H */ -- cgit v1.2.3 From 0f9b85edcae8b3a4511222592acec6697c018b90 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Oct 2022 17:52:11 +0200 Subject: power: remove pda_power supply driver This driver was used for a couple of Intel PXA and Samsung S3C24xx based PDAs, but all of those are now removed from the kernel, so the driver itself is no longer useful. Cc: Anton Vorontsov Cc: linux-pm@vger.kernel.org Acked-by: Sebastian Reichel Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- include/linux/pda_power.h | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 include/linux/pda_power.h (limited to 'include/linux') diff --git a/include/linux/pda_power.h b/include/linux/pda_power.h deleted file mode 100644 index 2a69db4b60b7..000000000000 --- a/include/linux/pda_power.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Common power driver for PDAs and phones with one or two external - * power supplies (AC/USB) connected to main and backup batteries, - * and optional builtin charger. - * - * Copyright © 2007 Anton Vorontsov - */ - -#ifndef __PDA_POWER_H__ -#define __PDA_POWER_H__ - -#define PDA_POWER_CHARGE_AC (1 << 0) -#define PDA_POWER_CHARGE_USB (1 << 1) - -struct device; - -struct pda_power_pdata { - int (*init)(struct device *dev); - int (*is_ac_online)(void); - int (*is_usb_online)(void); - void (*set_charge)(int flags); - void (*exit)(struct device *dev); - int (*suspend)(pm_message_t state); - int (*resume)(void); - - char **supplied_to; - size_t num_supplicants; - - unsigned int wait_for_status; /* msecs, default is 500 */ - unsigned int wait_for_charger; /* msecs, default is 500 */ - unsigned int polling_interval; /* msecs, default is 2000 */ - - unsigned long ac_max_uA; /* current to draw when on AC */ - - bool use_otg_notifier; -}; - -#endif /* __PDA_POWER_H__ */ -- cgit v1.2.3 From 6388bbad4a26a765cdd310478ef077fbf7e6ea35 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Oct 2022 18:42:54 +0200 Subject: rtc: remove v3020 driver The v3020 RTC driver was exclusively used by the now removed cm-x300.c machine. Cc: Alessandro Zummo Cc: Alexandre Belloni Cc: linux-kernel@vger.kernel.org Cc: linux-rtc@vger.kernel.org Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- include/linux/platform_data/rtc-v3020.h | 41 --------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 include/linux/platform_data/rtc-v3020.h (limited to 'include/linux') diff --git a/include/linux/platform_data/rtc-v3020.h b/include/linux/platform_data/rtc-v3020.h deleted file mode 100644 index e55d82cebf80..000000000000 --- a/include/linux/platform_data/rtc-v3020.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * v3020.h - Registers definition and platform data structure for the v3020 RTC. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2006, 8D Technologies inc. - */ -#ifndef __LINUX_V3020_H -#define __LINUX_V3020_H - -/* The v3020 has only one data pin but which one - * is used depends on the board. */ -struct v3020_platform_data { - int leftshift; /* (1<<(leftshift)) & readl() */ - - unsigned int use_gpio:1; - unsigned int gpio_cs; - unsigned int gpio_wr; - unsigned int gpio_rd; - unsigned int gpio_io; -}; - -#define V3020_STATUS_0 0x00 -#define V3020_STATUS_1 0x01 -#define V3020_SECONDS 0x02 -#define V3020_MINUTES 0x03 -#define V3020_HOURS 0x04 -#define V3020_MONTH_DAY 0x05 -#define V3020_MONTH 0x06 -#define V3020_YEAR 0x07 -#define V3020_WEEK_DAY 0x08 -#define V3020_WEEK 0x09 - -#define V3020_IS_COMMAND(val) ((val)>=0x0E) - -#define V3020_CMD_RAM2CLOCK 0x0E -#define V3020_CMD_CLOCK2RAM 0x0F - -#endif /* __LINUX_V3020_H */ -- cgit v1.2.3 From 8971bb812e3c14aa730e751ddf2d90c32e9dc519 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Oct 2022 22:53:30 +0200 Subject: mfd: remove toshiba tmio drivers Four separate mfd drivers are in the "tmio" family, and all of them were used in now-removed PXA machines (eseries, tosa, and hx4700), so the mfd drivers and all its children can be removed as well. Cc: Lee Jones Cc: Wolfram Sang Cc: linux-kernel@vger.kernel.org Cc: linux-mmc@vger.kernel.org Cc: linux-renesas-soc@vger.kernel.org Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- include/linux/mfd/asic3.h | 313 ------------------------------------------- include/linux/mfd/t7l66xb.h | 29 ---- include/linux/mfd/tc6387xb.h | 19 --- include/linux/mfd/tc6393xb.h | 53 -------- include/linux/mfd/tmio.h | 5 - 5 files changed, 419 deletions(-) delete mode 100644 include/linux/mfd/asic3.h delete mode 100644 include/linux/mfd/t7l66xb.h delete mode 100644 include/linux/mfd/tc6387xb.h delete mode 100644 include/linux/mfd/tc6393xb.h (limited to 'include/linux') diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h deleted file mode 100644 index 61e686dbaa74..000000000000 --- a/include/linux/mfd/asic3.h +++ /dev/null @@ -1,313 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * include/linux/mfd/asic3.h - * - * Compaq ASIC3 headers. - * - * Copyright 2001 Compaq Computer Corporation. - * Copyright 2007-2008 OpenedHand Ltd. - */ - -#ifndef __ASIC3_H__ -#define __ASIC3_H__ - -#include - -struct led_classdev; -struct asic3_led { - const char *name; - const char *default_trigger; - struct led_classdev *cdev; -}; - -struct asic3_platform_data { - u16 *gpio_config; - unsigned int gpio_config_num; - - unsigned int irq_base; - - unsigned int gpio_base; - - unsigned int clock_rate; - - struct asic3_led *leds; -}; - -#define ASIC3_NUM_GPIO_BANKS 4 -#define ASIC3_GPIOS_PER_BANK 16 -#define ASIC3_NUM_GPIOS 64 -#define ASIC3_NR_IRQS ASIC3_NUM_GPIOS + 6 - -#define ASIC3_IRQ_LED0 64 -#define ASIC3_IRQ_LED1 65 -#define ASIC3_IRQ_LED2 66 -#define ASIC3_IRQ_SPI 67 -#define ASIC3_IRQ_SMBUS 68 -#define ASIC3_IRQ_OWM 69 - -#define ASIC3_TO_GPIO(gpio) (NR_BUILTIN_GPIO + (gpio)) - -#define ASIC3_GPIO_BANK_A 0 -#define ASIC3_GPIO_BANK_B 1 -#define ASIC3_GPIO_BANK_C 2 -#define ASIC3_GPIO_BANK_D 3 - -#define ASIC3_GPIO(bank, gpio) \ - ((ASIC3_GPIOS_PER_BANK * ASIC3_GPIO_BANK_##bank) + (gpio)) -#define ASIC3_GPIO_bit(gpio) (1 << (gpio & 0xf)) -/* All offsets below are specified with this address bus shift */ -#define ASIC3_DEFAULT_ADDR_SHIFT 2 - -#define ASIC3_OFFSET(base, reg) (ASIC3_##base##_BASE + ASIC3_##base##_##reg) -#define ASIC3_GPIO_OFFSET(base, reg) \ - (ASIC3_GPIO_##base##_BASE + ASIC3_GPIO_##reg) - -#define ASIC3_GPIO_A_BASE 0x0000 -#define ASIC3_GPIO_B_BASE 0x0100 -#define ASIC3_GPIO_C_BASE 0x0200 -#define ASIC3_GPIO_D_BASE 0x0300 - -#define ASIC3_GPIO_TO_BANK(gpio) ((gpio) >> 4) -#define ASIC3_GPIO_TO_BIT(gpio) ((gpio) - \ - (ASIC3_GPIOS_PER_BANK * ((gpio) >> 4))) -#define ASIC3_GPIO_TO_MASK(gpio) (1 << ASIC3_GPIO_TO_BIT(gpio)) -#define ASIC3_GPIO_TO_BASE(gpio) (ASIC3_GPIO_A_BASE + (((gpio) >> 4) * 0x0100)) -#define ASIC3_BANK_TO_BASE(bank) (ASIC3_GPIO_A_BASE + ((bank) * 0x100)) - -#define ASIC3_GPIO_MASK 0x00 /* R/W 0:don't mask */ -#define ASIC3_GPIO_DIRECTION 0x04 /* R/W 0:input */ -#define ASIC3_GPIO_OUT 0x08 /* R/W 0:output low */ -#define ASIC3_GPIO_TRIGGER_TYPE 0x0c /* R/W 0:level */ -#define ASIC3_GPIO_EDGE_TRIGGER 0x10 /* R/W 0:falling */ -#define ASIC3_GPIO_LEVEL_TRIGGER 0x14 /* R/W 0:low level detect */ -#define ASIC3_GPIO_SLEEP_MASK 0x18 /* R/W 0:don't mask in sleep mode */ -#define ASIC3_GPIO_SLEEP_OUT 0x1c /* R/W level 0:low in sleep mode */ -#define ASIC3_GPIO_BAT_FAULT_OUT 0x20 /* R/W level 0:low in batt_fault */ -#define ASIC3_GPIO_INT_STATUS 0x24 /* R/W 0:none, 1:detect */ -#define ASIC3_GPIO_ALT_FUNCTION 0x28 /* R/W 1:LED register control */ -#define ASIC3_GPIO_SLEEP_CONF 0x2c /* - * R/W bit 1: autosleep - * 0: disable gposlpout in normal mode, - * enable gposlpout in sleep mode. - */ -#define ASIC3_GPIO_STATUS 0x30 /* R Pin status */ - -/* - * ASIC3 GPIO config - * - * Bits 0..6 gpio number - * Bits 7..13 Alternate function - * Bit 14 Direction - * Bit 15 Initial value - * - */ -#define ASIC3_CONFIG_GPIO_PIN(config) ((config) & 0x7f) -#define ASIC3_CONFIG_GPIO_ALT(config) (((config) & (0x7f << 7)) >> 7) -#define ASIC3_CONFIG_GPIO_DIR(config) ((config & (1 << 14)) >> 14) -#define ASIC3_CONFIG_GPIO_INIT(config) ((config & (1 << 15)) >> 15) -#define ASIC3_CONFIG_GPIO(gpio, alt, dir, init) (((gpio) & 0x7f) \ - | (((alt) & 0x7f) << 7) | (((dir) & 0x1) << 14) \ - | (((init) & 0x1) << 15)) -#define ASIC3_CONFIG_GPIO_DEFAULT(gpio, dir, init) \ - ASIC3_CONFIG_GPIO((gpio), 0, (dir), (init)) -#define ASIC3_CONFIG_GPIO_DEFAULT_OUT(gpio, init) \ - ASIC3_CONFIG_GPIO((gpio), 0, 1, (init)) - -/* - * Alternate functions - */ -#define ASIC3_GPIOA11_PWM0 ASIC3_CONFIG_GPIO(11, 1, 1, 0) -#define ASIC3_GPIOA12_PWM1 ASIC3_CONFIG_GPIO(12, 1, 1, 0) -#define ASIC3_GPIOA15_CONTROL_CX ASIC3_CONFIG_GPIO(15, 1, 1, 0) -#define ASIC3_GPIOC0_LED0 ASIC3_CONFIG_GPIO(32, 1, 0, 0) -#define ASIC3_GPIOC1_LED1 ASIC3_CONFIG_GPIO(33, 1, 0, 0) -#define ASIC3_GPIOC2_LED2 ASIC3_CONFIG_GPIO(34, 1, 0, 0) -#define ASIC3_GPIOC3_SPI_RXD ASIC3_CONFIG_GPIO(35, 1, 0, 0) -#define ASIC3_GPIOC4_CF_nCD ASIC3_CONFIG_GPIO(36, 1, 0, 0) -#define ASIC3_GPIOC4_SPI_TXD ASIC3_CONFIG_GPIO(36, 1, 1, 0) -#define ASIC3_GPIOC5_SPI_CLK ASIC3_CONFIG_GPIO(37, 1, 1, 0) -#define ASIC3_GPIOC5_nCIOW ASIC3_CONFIG_GPIO(37, 1, 1, 0) -#define ASIC3_GPIOC6_nCIOR ASIC3_CONFIG_GPIO(38, 1, 1, 0) -#define ASIC3_GPIOC7_nPCE_1 ASIC3_CONFIG_GPIO(39, 1, 0, 0) -#define ASIC3_GPIOC8_nPCE_2 ASIC3_CONFIG_GPIO(40, 1, 0, 0) -#define ASIC3_GPIOC9_nPOE ASIC3_CONFIG_GPIO(41, 1, 0, 0) -#define ASIC3_GPIOC10_nPWE ASIC3_CONFIG_GPIO(42, 1, 0, 0) -#define ASIC3_GPIOC11_PSKTSEL ASIC3_CONFIG_GPIO(43, 1, 0, 0) -#define ASIC3_GPIOC12_nPREG ASIC3_CONFIG_GPIO(44, 1, 0, 0) -#define ASIC3_GPIOC13_nPWAIT ASIC3_CONFIG_GPIO(45, 1, 1, 0) -#define ASIC3_GPIOC14_nPIOIS16 ASIC3_CONFIG_GPIO(46, 1, 1, 0) -#define ASIC3_GPIOC15_nPIOR ASIC3_CONFIG_GPIO(47, 1, 0, 0) -#define ASIC3_GPIOD4_CF_nCD ASIC3_CONFIG_GPIO(52, 1, 0, 0) -#define ASIC3_GPIOD11_nCIOIS16 ASIC3_CONFIG_GPIO(59, 1, 0, 0) -#define ASIC3_GPIOD12_nCWAIT ASIC3_CONFIG_GPIO(60, 1, 0, 0) -#define ASIC3_GPIOD15_nPIOW ASIC3_CONFIG_GPIO(63, 1, 0, 0) - - -#define ASIC3_SPI_Base 0x0400 -#define ASIC3_SPI_Control 0x0000 -#define ASIC3_SPI_TxData 0x0004 -#define ASIC3_SPI_RxData 0x0008 -#define ASIC3_SPI_Int 0x000c -#define ASIC3_SPI_Status 0x0010 - -#define SPI_CONTROL_SPR(clk) ((clk) & 0x0f) /* Clock rate */ - -#define ASIC3_PWM_0_Base 0x0500 -#define ASIC3_PWM_1_Base 0x0600 -#define ASIC3_PWM_TimeBase 0x0000 -#define ASIC3_PWM_PeriodTime 0x0004 -#define ASIC3_PWM_DutyTime 0x0008 - -#define PWM_TIMEBASE_VALUE(x) ((x)&0xf) /* Low 4 bits sets time base */ -#define PWM_TIMEBASE_ENABLE (1 << 4) /* Enable clock */ - -#define ASIC3_NUM_LEDS 3 -#define ASIC3_LED_0_Base 0x0700 -#define ASIC3_LED_1_Base 0x0800 -#define ASIC3_LED_2_Base 0x0900 -#define ASIC3_LED_TimeBase 0x0000 /* R/W 7 bits */ -#define ASIC3_LED_PeriodTime 0x0004 /* R/W 12 bits */ -#define ASIC3_LED_DutyTime 0x0008 /* R/W 12 bits */ -#define ASIC3_LED_AutoStopCount 0x000c /* R/W 16 bits */ - -/* LED TimeBase bits - match ASIC2 */ -#define LED_TBS 0x0f /* Low 4 bits sets time base, max = 13 */ - /* Note: max = 5 on hx4700 */ - /* 0: maximum time base */ - /* 1: maximum time base / 2 */ - /* n: maximum time base / 2^n */ - -#define LED_EN (1 << 4) /* LED ON/OFF 0:off, 1:on */ -#define LED_AUTOSTOP (1 << 5) /* LED ON/OFF auto stop 0:disable, 1:enable */ -#define LED_ALWAYS (1 << 6) /* LED Interrupt Mask 0:No mask, 1:mask */ - -#define ASIC3_CLOCK_BASE 0x0A00 -#define ASIC3_CLOCK_CDEX 0x00 -#define ASIC3_CLOCK_SEL 0x04 - -#define CLOCK_CDEX_SOURCE (1 << 0) /* 2 bits */ -#define CLOCK_CDEX_SOURCE0 (1 << 0) -#define CLOCK_CDEX_SOURCE1 (1 << 1) -#define CLOCK_CDEX_SPI (1 << 2) -#define CLOCK_CDEX_OWM (1 << 3) -#define CLOCK_CDEX_PWM0 (1 << 4) -#define CLOCK_CDEX_PWM1 (1 << 5) -#define CLOCK_CDEX_LED0 (1 << 6) -#define CLOCK_CDEX_LED1 (1 << 7) -#define CLOCK_CDEX_LED2 (1 << 8) - -/* Clocks settings: 1 for 24.576 MHz, 0 for 12.288Mhz */ -#define CLOCK_CDEX_SD_HOST (1 << 9) /* R/W: SD host clock source */ -#define CLOCK_CDEX_SD_BUS (1 << 10) /* R/W: SD bus clock source ctrl */ -#define CLOCK_CDEX_SMBUS (1 << 11) -#define CLOCK_CDEX_CONTROL_CX (1 << 12) - -#define CLOCK_CDEX_EX0 (1 << 13) /* R/W: 32.768 kHz crystal */ -#define CLOCK_CDEX_EX1 (1 << 14) /* R/W: 24.576 MHz crystal */ - -#define CLOCK_SEL_SD_HCLK_SEL (1 << 0) /* R/W: SDIO host clock select */ -#define CLOCK_SEL_SD_BCLK_SEL (1 << 1) /* R/W: SDIO bus clock select */ - -/* R/W: INT clock source control (32.768 kHz) */ -#define CLOCK_SEL_CX (1 << 2) - - -#define ASIC3_INTR_BASE 0x0B00 - -#define ASIC3_INTR_INT_MASK 0x00 /* Interrupt mask control */ -#define ASIC3_INTR_P_INT_STAT 0x04 /* Peripheral interrupt status */ -#define ASIC3_INTR_INT_CPS 0x08 /* Interrupt timer clock pre-scale */ -#define ASIC3_INTR_INT_TBS 0x0c /* Interrupt timer set */ - -#define ASIC3_INTMASK_GINTMASK (1 << 0) /* Global INTs mask 1:enable */ -#define ASIC3_INTMASK_GINTEL (1 << 1) /* 1: rising edge, 0: hi level */ -#define ASIC3_INTMASK_MASK0 (1 << 2) -#define ASIC3_INTMASK_MASK1 (1 << 3) -#define ASIC3_INTMASK_MASK2 (1 << 4) -#define ASIC3_INTMASK_MASK3 (1 << 5) -#define ASIC3_INTMASK_MASK4 (1 << 6) -#define ASIC3_INTMASK_MASK5 (1 << 7) - -#define ASIC3_INTR_PERIPHERAL_A (1 << 0) -#define ASIC3_INTR_PERIPHERAL_B (1 << 1) -#define ASIC3_INTR_PERIPHERAL_C (1 << 2) -#define ASIC3_INTR_PERIPHERAL_D (1 << 3) -#define ASIC3_INTR_LED0 (1 << 4) -#define ASIC3_INTR_LED1 (1 << 5) -#define ASIC3_INTR_LED2 (1 << 6) -#define ASIC3_INTR_SPI (1 << 7) -#define ASIC3_INTR_SMBUS (1 << 8) -#define ASIC3_INTR_OWM (1 << 9) - -#define ASIC3_INTR_CPS(x) ((x)&0x0f) /* 4 bits, max 14 */ -#define ASIC3_INTR_CPS_SET (1 << 4) /* Time base enable */ - - -/* Basic control of the SD ASIC */ -#define ASIC3_SDHWCTRL_BASE 0x0E00 -#define ASIC3_SDHWCTRL_SDCONF 0x00 - -#define ASIC3_SDHWCTRL_SUSPEND (1 << 0) /* 1=suspend all SD operations */ -#define ASIC3_SDHWCTRL_CLKSEL (1 << 1) /* 1=SDICK, 0=HCLK */ -#define ASIC3_SDHWCTRL_PCLR (1 << 2) /* All registers of SDIO cleared */ -#define ASIC3_SDHWCTRL_LEVCD (1 << 3) /* SD card detection: 0:low */ - -/* SD card write protection: 0=high */ -#define ASIC3_SDHWCTRL_LEVWP (1 << 4) -#define ASIC3_SDHWCTRL_SDLED (1 << 5) /* SD card LED signal 0=disable */ - -/* SD card power supply ctrl 1=enable */ -#define ASIC3_SDHWCTRL_SDPWR (1 << 6) - -#define ASIC3_EXTCF_BASE 0x1100 - -#define ASIC3_EXTCF_SELECT 0x00 -#define ASIC3_EXTCF_RESET 0x04 - -#define ASIC3_EXTCF_SMOD0 (1 << 0) /* slot number of mode 0 */ -#define ASIC3_EXTCF_SMOD1 (1 << 1) /* slot number of mode 1 */ -#define ASIC3_EXTCF_SMOD2 (1 << 2) /* slot number of mode 2 */ -#define ASIC3_EXTCF_OWM_EN (1 << 4) /* enable onewire module */ -#define ASIC3_EXTCF_OWM_SMB (1 << 5) /* OWM bus selection */ -#define ASIC3_EXTCF_OWM_RESET (1 << 6) /* ?? used by OWM and CF */ -#define ASIC3_EXTCF_CF0_SLEEP_MODE (1 << 7) /* CF0 sleep state */ -#define ASIC3_EXTCF_CF1_SLEEP_MODE (1 << 8) /* CF1 sleep state */ -#define ASIC3_EXTCF_CF0_PWAIT_EN (1 << 10) /* CF0 PWAIT_n control */ -#define ASIC3_EXTCF_CF1_PWAIT_EN (1 << 11) /* CF1 PWAIT_n control */ -#define ASIC3_EXTCF_CF0_BUF_EN (1 << 12) /* CF0 buffer control */ -#define ASIC3_EXTCF_CF1_BUF_EN (1 << 13) /* CF1 buffer control */ -#define ASIC3_EXTCF_SD_MEM_ENABLE (1 << 14) -#define ASIC3_EXTCF_CF_SLEEP (1 << 15) /* CF sleep mode control */ - -/********************************************* - * The Onewire interface (DS1WM) is handled - * by the ds1wm driver. - * - *********************************************/ - -#define ASIC3_OWM_BASE 0xC00 - -/***************************************************************************** - * The SD configuration registers are at a completely different location - * in memory. They are divided into three sets of registers: - * - * SD_CONFIG Core configuration register - * SD_CTRL Control registers for SD operations - * SDIO_CTRL Control registers for SDIO operations - * - *****************************************************************************/ -#define ASIC3_SD_CONFIG_BASE 0x0400 /* Assumes 32 bit addressing */ -#define ASIC3_SD_CONFIG_SIZE 0x0200 /* Assumes 32 bit addressing */ -#define ASIC3_SD_CTRL_BASE 0x1000 -#define ASIC3_SDIO_CTRL_BASE 0x1200 - -#define ASIC3_MAP_SIZE_32BIT 0x2000 -#define ASIC3_MAP_SIZE_16BIT 0x1000 - -/* Functions needed by leds-asic3 */ - -struct asic3; -extern void asic3_write_register(struct asic3 *asic, unsigned int reg, u32 val); -extern u32 asic3_read_register(struct asic3 *asic, unsigned int reg); - -#endif /* __ASIC3_H__ */ diff --git a/include/linux/mfd/t7l66xb.h b/include/linux/mfd/t7l66xb.h deleted file mode 100644 index ae3e7a5c5219..000000000000 --- a/include/linux/mfd/t7l66xb.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * This file contains the definitions for the T7L66XB - * - * (C) Copyright 2005 Ian Molton - */ -#ifndef MFD_T7L66XB_H -#define MFD_T7L66XB_H - -#include -#include - -struct t7l66xb_platform_data { - int (*enable)(struct platform_device *dev); - int (*suspend)(struct platform_device *dev); - int (*resume)(struct platform_device *dev); - - int irq_base; /* The base for subdevice irqs */ - - struct tmio_nand_data *nand_data; -}; - - -#define IRQ_T7L66XB_MMC (1) -#define IRQ_T7L66XB_NAND (3) - -#define T7L66XB_NR_IRQS 8 - -#endif diff --git a/include/linux/mfd/tc6387xb.h b/include/linux/mfd/tc6387xb.h deleted file mode 100644 index aacf1dcc86b9..000000000000 --- a/include/linux/mfd/tc6387xb.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * This file contains the definitions for the TC6387XB - * - * (C) Copyright 2005 Ian Molton - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - */ -#ifndef MFD_TC6387XB_H -#define MFD_TC6387XB_H - -struct tc6387xb_platform_data { - int (*enable)(struct platform_device *dev); - int (*suspend)(struct platform_device *dev); - int (*resume)(struct platform_device *dev); -}; - -#endif diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h deleted file mode 100644 index d17807f2d0c9..000000000000 --- a/include/linux/mfd/tc6393xb.h +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Toshiba TC6393XB SoC support - * - * Copyright(c) 2005-2006 Chris Humbert - * Copyright(c) 2005 Dirk Opfer - * Copyright(c) 2005 Ian Molton - * Copyright(c) 2007 Dmitry Baryshkov - * - * Based on code written by Sharp/Lineo for 2.4 kernels - * Based on locomo.c - */ - -#ifndef MFD_TC6393XB_H -#define MFD_TC6393XB_H - -#include - -/* Also one should provide the CK3P6MI clock */ -struct tc6393xb_platform_data { - u16 scr_pll2cr; /* PLL2 Control */ - u16 scr_gper; /* GP Enable */ - - int (*enable)(struct platform_device *dev); - void (*disable)(struct platform_device *dev); - int (*suspend)(struct platform_device *dev); - int (*resume)(struct platform_device *dev); - - int irq_base; /* base for subdevice irqs */ - - struct tmio_nand_data *nand_data; - struct tmio_fb_data *fb_data; - - unsigned resume_restore : 1; /* make special actions - to preserve the state - on suspend/resume */ -}; - -extern int tc6393xb_lcd_mode(struct platform_device *fb, - const struct fb_videomode *mode); -extern int tc6393xb_lcd_set_power(struct platform_device *fb, bool on); - -/* - * Relative to irq_base - */ -#define IRQ_TC6393_NAND 0 -#define IRQ_TC6393_MMC 1 -#define IRQ_TC6393_OHCI 2 -#define IRQ_TC6393_FB 4 - -#define TC6393XB_NR_IRQS 8 - -#endif diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index e8bf90281ba0..eace8ea6cda0 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -84,11 +84,6 @@ /* Some controllers have a CBSY bit */ #define TMIO_MMC_HAVE_CBSY BIT(11) -int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); -int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); -void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state); - struct dma_chan; /* -- cgit v1.2.3 From 2e99b1b065fb9c6e771ce573cf6c2dbb52c14627 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Oct 2022 22:52:32 +0200 Subject: mfd: remove ucb1400 support The ucb1400 MFD driver and its gpio and touchscreen child drivers were only used on a few PXA machines that were unused for a while and are now removed. Removing these leaves the AC97 support as ALSA specific, no other drivers are now connected through this interface. Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: Dmitry Torokhov Cc: Lee Jones Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Marek Vasut Cc: linux-kernel@vger.kernel.org Cc: linux-gpio@vger.kernel.org Cc: linux-input@vger.kernel.org Cc: alsa-devel@alsa-project.org Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- include/linux/ucb1400.h | 160 ------------------------------------------------ 1 file changed, 160 deletions(-) delete mode 100644 include/linux/ucb1400.h (limited to 'include/linux') diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h deleted file mode 100644 index 2516082cd3a9..000000000000 --- a/include/linux/ucb1400.h +++ /dev/null @@ -1,160 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Register definitions and functions for: - * Philips UCB1400 driver - * - * Based on ucb1400_ts: - * Author: Nicolas Pitre - * Created: September 25, 2006 - * Copyright: MontaVista Software, Inc. - * - * Spliting done by: Marek Vasut - * If something doesn't work and it worked before spliting, e-mail me, - * dont bother Nicolas please ;-) - * - * This code is heavily based on ucb1x00-*.c copyrighted by Russell King - * covering the UCB1100, UCB1200 and UCB1300.. Support for the UCB1400 has - * been made separate from ucb1x00-core/ucb1x00-ts on Russell's request. - */ - -#ifndef _LINUX__UCB1400_H -#define _LINUX__UCB1400_H - -#include -#include -#include -#include - -/* - * UCB1400 AC-link registers - */ - -#define UCB_IO_DATA 0x5a -#define UCB_IO_DIR 0x5c -#define UCB_IE_RIS 0x5e -#define UCB_IE_FAL 0x60 -#define UCB_IE_STATUS 0x62 -#define UCB_IE_CLEAR 0x62 -#define UCB_IE_ADC (1 << 11) -#define UCB_IE_TSPX (1 << 12) - -#define UCB_TS_CR 0x64 -#define UCB_TS_CR_TSMX_POW (1 << 0) -#define UCB_TS_CR_TSPX_POW (1 << 1) -#define UCB_TS_CR_TSMY_POW (1 << 2) -#define UCB_TS_CR_TSPY_POW (1 << 3) -#define UCB_TS_CR_TSMX_GND (1 << 4) -#define UCB_TS_CR_TSPX_GND (1 << 5) -#define UCB_TS_CR_TSMY_GND (1 << 6) -#define UCB_TS_CR_TSPY_GND (1 << 7) -#define UCB_TS_CR_MODE_INT (0 << 8) -#define UCB_TS_CR_MODE_PRES (1 << 8) -#define UCB_TS_CR_MODE_POS (2 << 8) -#define UCB_TS_CR_BIAS_ENA (1 << 11) -#define UCB_TS_CR_TSPX_LOW (1 << 12) -#define UCB_TS_CR_TSMX_LOW (1 << 13) - -#define UCB_ADC_CR 0x66 -#define UCB_ADC_SYNC_ENA (1 << 0) -#define UCB_ADC_VREFBYP_CON (1 << 1) -#define UCB_ADC_INP_TSPX (0 << 2) -#define UCB_ADC_INP_TSMX (1 << 2) -#define UCB_ADC_INP_TSPY (2 << 2) -#define UCB_ADC_INP_TSMY (3 << 2) -#define UCB_ADC_INP_AD0 (4 << 2) -#define UCB_ADC_INP_AD1 (5 << 2) -#define UCB_ADC_INP_AD2 (6 << 2) -#define UCB_ADC_INP_AD3 (7 << 2) -#define UCB_ADC_EXT_REF (1 << 5) -#define UCB_ADC_START (1 << 7) -#define UCB_ADC_ENA (1 << 15) - -#define UCB_ADC_DATA 0x68 -#define UCB_ADC_DAT_VALID (1 << 15) - -#define UCB_FCSR 0x6c -#define UCB_FCSR_AVE (1 << 12) - -#define UCB_ADC_DAT_MASK 0x3ff - -#define UCB_ID 0x7e -#define UCB_ID_1400 0x4304 - -struct ucb1400_gpio { - struct gpio_chip gc; - struct snd_ac97 *ac97; - int gpio_offset; -}; - -struct ucb1400_ts { - struct input_dev *ts_idev; - int id; - int irq; - struct snd_ac97 *ac97; - wait_queue_head_t ts_wait; - bool stopped; -}; - -struct ucb1400 { - struct platform_device *ucb1400_ts; - struct platform_device *ucb1400_gpio; -}; - -struct ucb1400_pdata { - int irq; - int gpio_offset; - int (*gpio_setup)(struct device *dev, int ngpio); - int (*gpio_teardown)(struct device *dev, int ngpio); -}; - -static inline u16 ucb1400_reg_read(struct snd_ac97 *ac97, u16 reg) -{ - return ac97->bus->ops->read(ac97, reg); -} - -static inline void ucb1400_reg_write(struct snd_ac97 *ac97, u16 reg, u16 val) -{ - ac97->bus->ops->write(ac97, reg, val); -} - -static inline u16 ucb1400_gpio_get_value(struct snd_ac97 *ac97, u16 gpio) -{ - return ucb1400_reg_read(ac97, UCB_IO_DATA) & (1 << gpio); -} - -static inline void ucb1400_gpio_set_value(struct snd_ac97 *ac97, u16 gpio, - u16 val) -{ - ucb1400_reg_write(ac97, UCB_IO_DATA, val ? - ucb1400_reg_read(ac97, UCB_IO_DATA) | (1 << gpio) : - ucb1400_reg_read(ac97, UCB_IO_DATA) & ~(1 << gpio)); -} - -static inline u16 ucb1400_gpio_get_direction(struct snd_ac97 *ac97, u16 gpio) -{ - return ucb1400_reg_read(ac97, UCB_IO_DIR) & (1 << gpio); -} - -static inline void ucb1400_gpio_set_direction(struct snd_ac97 *ac97, u16 gpio, - u16 dir) -{ - ucb1400_reg_write(ac97, UCB_IO_DIR, dir ? - ucb1400_reg_read(ac97, UCB_IO_DIR) | (1 << gpio) : - ucb1400_reg_read(ac97, UCB_IO_DIR) & ~(1 << gpio)); -} - -static inline void ucb1400_adc_enable(struct snd_ac97 *ac97) -{ - ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA); -} - -static inline void ucb1400_adc_disable(struct snd_ac97 *ac97) -{ - ucb1400_reg_write(ac97, UCB_ADC_CR, 0); -} - - -unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel, - int adcsync); - -#endif -- cgit v1.2.3 From 61d9420a213826e0088b6e377b55237f1c53887d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 7 Oct 2022 12:13:29 +0200 Subject: mfd: remove htc-pasic3 driver The htc-pasic3 MFD device was only used in the PXA magician machine that is now removed, so this can be recycled as well. Cc: Lee Jones Cc: Philipp Zabel Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- include/linux/mfd/htc-pasic3.h | 54 ------------------------------------------ 1 file changed, 54 deletions(-) delete mode 100644 include/linux/mfd/htc-pasic3.h (limited to 'include/linux') diff --git a/include/linux/mfd/htc-pasic3.h b/include/linux/mfd/htc-pasic3.h deleted file mode 100644 index 3d3ed67bd969..000000000000 --- a/include/linux/mfd/htc-pasic3.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * HTC PASIC3 driver - LEDs and DS1WM - * - * Copyright (c) 2007 Philipp Zabel - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive for - * more details. - * - */ - -#ifndef __PASIC3_H -#define __PASIC3_H - -#include -#include - -extern void pasic3_write_register(struct device *dev, u32 reg, u8 val); -extern u8 pasic3_read_register(struct device *dev, u32 reg); - -/* - * mask for registers 0x20,0x21,0x22 - */ -#define PASIC3_MASK_LED0 0x04 -#define PASIC3_MASK_LED1 0x08 -#define PASIC3_MASK_LED2 0x40 - -/* - * bits in register 0x06 - */ -#define PASIC3_BIT2_LED0 0x08 -#define PASIC3_BIT2_LED1 0x10 -#define PASIC3_BIT2_LED2 0x20 - -struct pasic3_led { - struct led_classdev led; - unsigned int hw_num; - unsigned int bit2; - unsigned int mask; - struct pasic3_leds_machinfo *pdata; -}; - -struct pasic3_leds_machinfo { - unsigned int num_leds; - unsigned int power_gpio; - struct pasic3_led *leds; -}; - -struct pasic3_platform_data { - struct pasic3_leds_machinfo *led_pdata; - unsigned int clock_rate; -}; - -#endif -- cgit v1.2.3 From b5018dd5aa7d3418755a758210c3195d9bd1c98e Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 12 Jan 2023 09:37:43 +0100 Subject: ARM: s3c: remove obsolete s3c-cpu-freq header The s3c-cpu-freq header was previously included by: ./arch/arm/mach-s3c/mach-bast.c ./arch/arm/mach-s3c/mach-osiris-dvs.c ./arch/arm/mach-s3c/mach-osiris.c ./include/linux/soc/samsung/s3c-cpufreq-core.h Commit a4946a153cb9 ("ARM: s3c: remove all s3c24xx support") removes the files in ./arch/arm/mach-s3c/; commit daf0ee583fc7 ("cpufreq: remove s3c24xx drivers") removes the file s3c-cpufreq-core.h. Remove this obsolete header file. This issue was identified, as s3c-cpu-freq.h referred to the removed config ARM_S3C_CPUFREQ. Signed-off-by: Lukas Bulwahn Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- include/linux/soc/samsung/s3c-cpu-freq.h | 145 ------------------------------- 1 file changed, 145 deletions(-) delete mode 100644 include/linux/soc/samsung/s3c-cpu-freq.h (limited to 'include/linux') diff --git a/include/linux/soc/samsung/s3c-cpu-freq.h b/include/linux/soc/samsung/s3c-cpu-freq.h deleted file mode 100644 index 63e88fd5dea2..000000000000 --- a/include/linux/soc/samsung/s3c-cpu-freq.h +++ /dev/null @@ -1,145 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) 2006-2007 Simtec Electronics - * http://armlinux.simtec.co.uk/ - * Ben Dooks - * - * S3C CPU frequency scaling support - driver and board - */ -#ifndef __LINUX_SOC_SAMSUNG_S3C_CPU_FREQ_H -#define __LINUX_SOC_SAMSUNG_S3C_CPU_FREQ_H - -#include - -struct s3c_cpufreq_info; -struct s3c_cpufreq_board; -struct s3c_iotimings; - -/** - * struct s3c_freq - frequency information (mainly for core drivers) - * @fclk: The FCLK frequency in Hz. - * @armclk: The ARMCLK frequency in Hz. - * @hclk_tns: HCLK cycle time in 10ths of nano-seconds. - * @hclk: The HCLK frequency in Hz. - * @pclk: The PCLK frequency in Hz. - * - * This contains the frequency information about the current configuration - * mainly for the core drivers to ensure we do not end up passing about - * a large number of parameters. - * - * The @hclk_tns field is a useful cache for the parts of the drivers that - * need to calculate IO timings and suchlike. - */ -struct s3c_freq { - unsigned long fclk; - unsigned long armclk; - unsigned long hclk_tns; /* in 10ths of ns */ - unsigned long hclk; - unsigned long pclk; -}; - -/** - * struct s3c_cpufreq_freqs - s3c cpufreq notification information. - * @freqs: The cpufreq setting information. - * @old: The old clock settings. - * @new: The new clock settings. - * @pll_changing: Set if the PLL is changing. - * - * Wrapper 'struct cpufreq_freqs' so that any drivers receiving the - * notification can use this information that is not provided by just - * having the core frequency alone. - * - * The pll_changing flag is used to indicate if the PLL itself is - * being set during this change. This is important as the clocks - * will temporarily be set to the XTAL clock during this time, so - * drivers may want to close down their output during this time. - * - * Note, this is not being used by any current drivers and therefore - * may be removed in the future. - */ -struct s3c_cpufreq_freqs { - struct cpufreq_freqs freqs; - struct s3c_freq old; - struct s3c_freq new; - - unsigned int pll_changing:1; -}; - -#define to_s3c_cpufreq(_cf) container_of(_cf, struct s3c_cpufreq_freqs, freqs) - -/** - * struct s3c_clkdivs - clock divisor information - * @p_divisor: Divisor from FCLK to PCLK. - * @h_divisor: Divisor from FCLK to HCLK. - * @arm_divisor: Divisor from FCLK to ARMCLK (not all CPUs). - * @dvs: Non-zero if using DVS mode for ARMCLK. - * - * Divisor settings for the core clocks. - */ -struct s3c_clkdivs { - int p_divisor; - int h_divisor; - int arm_divisor; - unsigned char dvs; -}; - -#define PLLVAL(_m, _p, _s) (((_m) << 12) | ((_p) << 4) | (_s)) - -/** - * struct s3c_pllval - PLL value entry. - * @freq: The frequency for this entry in Hz. - * @pll_reg: The PLL register setting for this PLL value. - */ -struct s3c_pllval { - unsigned long freq; - unsigned long pll_reg; -}; - -/** - * struct s3c_cpufreq_board - per-board cpu frequency informatin - * @refresh: The SDRAM refresh period in nanoseconds. - * @auto_io: Set if the IO timing settings should be generated from the - * initialisation time hardware registers. - * @need_io: Set if the board has external IO on any of the chipselect - * lines that will require the hardware timing registers to be - * updated on a clock change. - * @max: The maxium frequency limits for the system. Any field that - * is left at zero will use the CPU's settings. - * - * This contains the board specific settings that affect how the CPU - * drivers chose settings. These include the memory refresh and IO - * timing information. - * - * Registration depends on the driver being used, the ARMCLK only - * implementation does not currently need this but the older style - * driver requires this to be available. - */ -struct s3c_cpufreq_board { - unsigned int refresh; - unsigned int auto_io:1; /* automatically init io timings. */ - unsigned int need_io:1; /* set if needs io timing support. */ - - /* any non-zero field in here is taken as an upper limit. */ - struct s3c_freq max; /* frequency limits */ -}; - -/* Things depending on frequency scaling. */ -#ifdef CONFIG_ARM_S3C_CPUFREQ -#define __init_or_cpufreq -#else -#define __init_or_cpufreq __init -#endif - -/* Board functions */ - -#ifdef CONFIG_ARM_S3C_CPUFREQ -extern int s3c_cpufreq_setboard(struct s3c_cpufreq_board *board); -#else - -static inline int s3c_cpufreq_setboard(struct s3c_cpufreq_board *board) -{ - return 0; -} -#endif /* CONFIG_ARM_S3C_CPUFREQ */ - -#endif -- cgit v1.2.3 From 8b3517f88ff2983f52698893519227c10aac90b2 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 1 Feb 2023 12:30:18 +0800 Subject: PCI: loongson: Prevent LS7A MRRS increases Except for isochronous-configured devices, software may set Max_Read_Request_Size (MRRS) to any value up to 4096. If a device issues a read request with size greater than the completer's Max_Payload_Size (MPS), the completer is required to break the response into multiple completions. Instead of correctly responding with multiple completions to a large read request, some LS7A Root Ports respond with a Completer Abort. To prevent this, the MRRS must be limited to an implementation-specific value. The OS cannot detect that value, so rely on BIOS to configure MRRS before booting, and quirk the Root Ports so we never set an MRRS larger than that BIOS value for any downstream device. N.B. Hot-added devices are not configured by BIOS, and they power up with MRRS = 512 bytes, so these devices will be limited to 512 bytes. If the LS7A limit is smaller, those hot-added devices may not work correctly, but per [1], hotplug is not supported with this chipset revision. [1] https://lore.kernel.org/r/073638a7-ae68-2847-ac3d-29e5e760d6af@loongson.cn [bhelgaas: commit log] Link: https://bugzilla.kernel.org/show_bug.cgi?id=216884 Link: https://lore.kernel.org/r/20230201043018.778499-3-chenhuacai@loongson.cn Signed-off-by: Huacai Chen Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index adffd65e84b4..3df2049ec4a8 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -572,6 +572,7 @@ struct pci_host_bridge { void *release_data; unsigned int ignore_reset_delay:1; /* For entire hierarchy */ unsigned int no_ext_tags:1; /* No Extended Tags */ + unsigned int no_inc_mrrs:1; /* No Increase MRRS */ unsigned int native_aer:1; /* OS may use PCIe AER */ unsigned int native_pcie_hotplug:1; /* OS may use PCIe hotplug */ unsigned int native_shpc_hotplug:1; /* OS may use SHPC hotplug */ -- cgit v1.2.3 From 37e98d9bedb50644654fd196e38acad49903fadc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Feb 2023 09:33:49 +0100 Subject: driver core: bus: move lock_class_key into dynamic structure Move the lock_class_key structure out of struct bus_type and into the dynamic structure we create already for all bus_types registered with the kernel. This saves on static space and removes one more writable field in struct bus_type. In the future, the same field can be moved out of the struct class logic because it shares this same private structure. Most everyone will never notice this change, as lockdep is not enabled in real systems so no memory or logic changes are happening for them. Cc: "Rafael J. Wysocki" Acked-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20230201083349.4038660-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 87e4d029c915..e3094db1e9fa 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -112,7 +112,6 @@ struct bus_type { const struct iommu_ops *iommu_ops; struct subsys_private *p; - struct lock_class_key lock_key; bool need_parent_lock; }; -- cgit v1.2.3 From 9f2ad955f98366003e887e32bdd4ea98ef2a04f4 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 1 Feb 2023 21:10:26 +0100 Subject: Revert "at86rf230: convert to gpio descriptors" This reverts commit 622bd6ea90086beb98ac439edd7d57de73d1d6f9. Dmitry Torokhov points out that this conversion leaves an existing board in reset state due to not properly handled polarity. Additionally, the GPIO name inadvertenly changes from "reset-gpio" to "rstn-gpios". Revert to avoid these regressions. Follow up patches for a better conversion are applied as well. Signed-off-by: Stefan Schmidt --- include/linux/spi/at86rf230.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/spi/at86rf230.h (limited to 'include/linux') diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h new file mode 100644 index 000000000000..d278576ab692 --- /dev/null +++ b/include/linux/spi/at86rf230.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AT86RF230/RF231 driver + * + * Copyright (C) 2009-2012 Siemens AG + * + * Written by: + * Dmitry Eremin-Solenikov + */ +#ifndef AT86RF230_H +#define AT86RF230_H + +struct at86rf230_platform_data { + int rstn; + int slp_tr; + int dig2; + u8 xtal_trim; +}; + +#endif -- cgit v1.2.3 From 9b26ed18545056794bf9d3f3defd9e09719ff4f5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 31 Jan 2023 21:34:46 -0800 Subject: ieee802154: at86rf230: drop support for platform data There are no users of platform data in the mainline tree, and new boards should use either ACPI or device tree, so let's stop supporting it. This will help with converting the driver to gpiod API. Signed-off-by: Dmitry Torokhov Link: https://lore.kernel.org/r/20230201053447.4098486-1-dmitry.torokhov@gmail.com Signed-off-by: Stefan Schmidt --- include/linux/spi/at86rf230.h | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 include/linux/spi/at86rf230.h (limited to 'include/linux') diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h deleted file mode 100644 index d278576ab692..000000000000 --- a/include/linux/spi/at86rf230.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AT86RF230/RF231 driver - * - * Copyright (C) 2009-2012 Siemens AG - * - * Written by: - * Dmitry Eremin-Solenikov - */ -#ifndef AT86RF230_H -#define AT86RF230_H - -struct at86rf230_platform_data { - int rstn; - int slp_tr; - int dig2; - u8 xtal_trim; -}; - -#endif -- cgit v1.2.3 From 57e7c169cd6afa093d858b8edfb9bceaf2e1c93b Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 1 Feb 2023 11:30:13 -0600 Subject: bpf: Add __bpf_kfunc tag for marking kernel functions as kfuncs kfuncs are functions defined in the kernel, which may be invoked by BPF programs. They may or may not also be used as regular kernel functions, implying that they may be static (in which case the compiler could e.g. inline it away, or elide one or more arguments), or it could have external linkage, but potentially be elided in an LTO build if a function is observed to never be used, and is stripped from the final kernel binary. This has already resulted in some issues, such as those discussed in [0] wherein changes in DWARF that identify when a parameter has been optimized out can break BTF encodings (and in general break the kfunc). [0]: https://lore.kernel.org/all/1675088985-20300-2-git-send-email-alan.maguire@oracle.com/ We therefore require some convenience macro that kfunc developers can use just add to their kfuncs, and which will prevent all of the above issues from happening. This is in contrast with what we have today, where some kfunc definitions have "noinline", some have "__used", and others are static and have neither. Note that longer term, this mechanism may be replaced by a macro that more closely resembles EXPORT_SYMBOL_GPL(), as described in [1]. For now, we're going with this shorter-term approach to fix existing issues in kfuncs. [1]: https://lore.kernel.org/lkml/Y9AFT4pTydKh+PD3@maniforge.lan/ Note as well that checkpatch complains about this patch with the following: ERROR: Macros with complex values should be enclosed in parentheses +#define __bpf_kfunc __used noinline There seems to be a precedent for using this pattern in other places such as compiler_types.h (see e.g. __randomize_layout and noinstr), so it seems appropriate. Signed-off-by: David Vernet Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20230201173016.342758-2-void@manifault.com --- include/linux/btf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index e9b90d9c3569..49e0fe6d8274 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -72,6 +72,14 @@ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ #define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */ +/* + * Tag marking a kernel function as a kfunc. This is meant to minimize the + * amount of copy-paste that kfunc authors have to include for correctness so + * as to avoid issues such as the compiler inlining or eliding either a static + * kfunc, or a global kfunc in an LTO build. + */ +#define __bpf_kfunc __used noinline + /* * Return the name of the passed struct, if exists, or halt the build if for * example the structure gets renamed. In this way, developers have to revisit -- cgit v1.2.3 From 058a8f7f73aae1cc22b53fcefec031b9e391b54d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Jan 2023 10:58:30 -0500 Subject: net: add a couple of helpers for iph tot_len This patch adds three APIs to replace the iph->tot_len setting and getting in all places where IPv4 BIG TCP packets may reach, they will be used in the following patches. Note that iph_totlen() will be used when iph is not in linear data of the skb. Signed-off-by: Xin Long Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/ip.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index 3d9c6750af62..d11c25f5030a 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -35,4 +35,25 @@ static inline unsigned int ip_transport_len(const struct sk_buff *skb) { return ntohs(ip_hdr(skb)->tot_len) - skb_network_header_len(skb); } + +static inline unsigned int iph_totlen(const struct sk_buff *skb, const struct iphdr *iph) +{ + u32 len = ntohs(iph->tot_len); + + return (len || !skb_is_gso(skb) || !skb_is_gso_tcp(skb)) ? + len : skb->len - skb_network_offset(skb); +} + +static inline unsigned int skb_ip_totlen(const struct sk_buff *skb) +{ + return iph_totlen(skb, ip_hdr(skb)); +} + +/* IPv4 datagram length is stored into 16bit field (tot_len) */ +#define IP_MAX_MTU 0xFFFFU + +static inline void iph_set_totlen(struct iphdr *iph, unsigned int len) +{ + iph->tot_len = len <= IP_MAX_MTU ? htons(len) : 0; +} #endif /* _LINUX_IP_H */ -- cgit v1.2.3 From 9eefedd58ae1daece2ba907849a44db2941fb4b0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Jan 2023 10:58:38 -0500 Subject: net: add gso_ipv4_max_size and gro_ipv4_max_size per device This patch introduces gso_ipv4_max_size and gro_ipv4_max_size per device and adds netlink attributes for them, so that IPV4 BIG TCP can be guarded by a separate tunable in the next patch. To not break the old application using "gso/gro_max_size" for IPv4 GSO packets, this patch updates "gso/gro_ipv4_max_size" in netif_set_gso/gro_max_size() if the new size isn't greater than GSO_LEGACY_MAX_SIZE, so that nothing will change even if userspace doesn't realize the new netlink attributes. Signed-off-by: Xin Long Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2466afa25078..d5ef4c1fedd2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1964,6 +1964,8 @@ enum netdev_ml_priv_type { * @gso_max_segs: Maximum number of segments that can be passed to the * NIC for GSO * @tso_max_segs: Device (as in HW) limit on the max TSO segment count + * @gso_ipv4_max_size: Maximum size of generic segmentation offload, + * for IPv4. * * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device @@ -2004,6 +2006,8 @@ enum netdev_ml_priv_type { * keep a list of interfaces to be deleted. * @gro_max_size: Maximum size of aggregated packet in generic * receive offload (GRO) + * @gro_ipv4_max_size: Maximum size of aggregated packet in generic + * receive offload (GRO), for IPv4. * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. @@ -2207,6 +2211,7 @@ struct net_device { */ #define GRO_MAX_SIZE (8 * 65535u) unsigned int gro_max_size; + unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; @@ -2330,6 +2335,7 @@ struct net_device { u16 gso_max_segs; #define TSO_MAX_SEGS U16_MAX u16 tso_max_segs; + unsigned int gso_ipv4_max_size; #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; -- cgit v1.2.3 From 028fb19c6ba743ed308ba99ac325afa968795e0f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 31 Jan 2023 15:31:57 +0200 Subject: netlink: provide an ability to set default extack message In netdev common pattern, extack pointer is forwarded to the drivers to be filled with error message. However, the caller can easily overwrite the filled message. Instead of adding multiple "if (!extack->_msg)" checks before any NL_SET_ERR_MSG() call, which appears after call to the driver, let's add new macro to common code. [1] https://lore.kernel.org/all/Y9Irgrgf3uxOjwUm@unreal Reviewed-by: Simon Horman Reviewed-by: Nikolay Aleksandrov Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/6993fac557a40a1973dfa0095107c3d03d40bec1.1675171790.git.leon@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netlink.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index fa4d86da0ec7..c43ac7690eca 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -130,6 +130,16 @@ struct netlink_ext_ack { #define NL_SET_ERR_MSG_FMT_MOD(extack, fmt, args...) \ NL_SET_ERR_MSG_FMT((extack), KBUILD_MODNAME ": " fmt, ##args) +#define NL_SET_ERR_MSG_WEAK(extack, msg) do { \ + if ((extack) && !(extack)->_msg) \ + NL_SET_ERR_MSG((extack), msg); \ +} while (0) + +#define NL_SET_ERR_MSG_WEAK_MOD(extack, msg) do { \ + if ((extack) && !(extack)->_msg) \ + NL_SET_ERR_MSG_MOD((extack), msg); \ +} while (0) + #define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do { \ if ((extack)) { \ (extack)->bad_attr = (attr); \ -- cgit v1.2.3 From b38b17b6a01ca4e738af097a1529910646ef4270 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 1 Feb 2023 09:36:44 +0800 Subject: ceph: move mount state enum to super.h These flags are only used in ceph filesystem in fs/ceph, so just move it to the place it should be. Signed-off-by: Xiubo Li Reviewed-by: Venky Shankar Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 00af2c98da75..4497d0a6772c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -99,16 +99,6 @@ struct ceph_options { #define CEPH_AUTH_NAME_DEFAULT "guest" -/* mount state */ -enum { - CEPH_MOUNT_MOUNTING, - CEPH_MOUNT_MOUNTED, - CEPH_MOUNT_UNMOUNTING, - CEPH_MOUNT_UNMOUNTED, - CEPH_MOUNT_SHUTDOWN, - CEPH_MOUNT_RECOVER, -}; - static inline unsigned long ceph_timeout_jiffies(unsigned long timeout) { return timeout ?: MAX_SCHEDULE_TIMEOUT; -- cgit v1.2.3 From dd3b3d160ea7004091051da60e86f36f40970888 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 27 Jan 2023 19:17:03 +0100 Subject: thermal: ACPI: Make helpers retrieve temperature only It is slightly better to make the ACPI thermal helper functions retrieve the trip point temperature only instead of doing the full trip point initialization, because they are also used for updating some already registered trip points, in which case initializing a new trip just in order to update the temperature of an existing one is somewhat wasteful. Modify the ACPI thermal helpers accordingly and update their users. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano --- include/linux/thermal.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 23c2617156b5..2bb4bf33f4f3 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -347,11 +347,10 @@ int thermal_zone_get_num_trips(struct thermal_zone_device *tz); int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp); #ifdef CONFIG_THERMAL_ACPI -int thermal_acpi_trip_active(struct acpi_device *adev, int id, - struct thermal_trip *trip); -int thermal_acpi_trip_passive(struct acpi_device *adev, struct thermal_trip *trip); -int thermal_acpi_trip_hot(struct acpi_device *adev, struct thermal_trip *trip); -int thermal_acpi_trip_critical(struct acpi_device *adev, struct thermal_trip *trip); +int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp); +int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp); +int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp); +int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); #endif #ifdef CONFIG_THERMAL -- cgit v1.2.3 From de82f60f9c86b72635ce49f7ab822e6a00a90dca Mon Sep 17 00:00:00 2001 From: Michael Bottini Date: Thu, 19 Jan 2023 19:15:19 -0800 Subject: PCI/ASPM: Add pci_enable_link_state() Add pci_enable_link_state() to allow devices to change the default BIOS configured states. Clears the BIOS default settings then sets the new states and reconfigures the link under the semaphore. Also add PCIE_LINK_STATE_ALL macro for convenience for callers that want to enable all link states. Link: https://lore.kernel.org/r/20230120031522.2304439-2-david.e.box@linux.intel.com Signed-off-by: Michael Bottini Signed-off-by: David E. Box Signed-off-by: Lorenzo Pieralisi Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Bjorn Helgaas --- include/linux/pci.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index adffd65e84b4..ea601e6fbbda 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1685,10 +1685,15 @@ extern bool pcie_ports_native; #define PCIE_LINK_STATE_L1_2 BIT(4) #define PCIE_LINK_STATE_L1_1_PCIPM BIT(5) #define PCIE_LINK_STATE_L1_2_PCIPM BIT(6) +#define PCIE_LINK_STATE_ALL (PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |\ + PCIE_LINK_STATE_CLKPM | PCIE_LINK_STATE_L1_1 |\ + PCIE_LINK_STATE_L1_2 | PCIE_LINK_STATE_L1_1_PCIPM |\ + PCIE_LINK_STATE_L1_2_PCIPM) #ifdef CONFIG_PCIEASPM int pci_disable_link_state(struct pci_dev *pdev, int state); int pci_disable_link_state_locked(struct pci_dev *pdev, int state); +int pci_enable_link_state(struct pci_dev *pdev, int state); void pcie_no_aspm(void); bool pcie_aspm_support_enabled(void); bool pcie_aspm_enabled(struct pci_dev *pdev); @@ -1697,6 +1702,8 @@ static inline int pci_disable_link_state(struct pci_dev *pdev, int state) { return 0; } static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { return 0; } +static inline int pci_enable_link_state(struct pci_dev *pdev, int state) +{ return 0; } static inline void pcie_no_aspm(void) { } static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } -- cgit v1.2.3 From acbc661032b8aa0e8359ac77074769ade34a176c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 1 Feb 2023 10:28:52 -0800 Subject: powercap: idle_inject: Add update callback The powercap/idle_inject core uses play_idle_precise() to inject idle time. But play_idle_precise() can't ensure that the CPU is fully idle for the specified duration because of wakeups due to interrupts. To compensate for the reduced idle time due to these wakes, the caller can adjust requested idle time for the next cycle. The goal of idle injection is to keep system at some idle percent on average, so this is fine to overshoot or undershoot instantaneous idle times. The idle inject core provides an interface idle_inject_set_duration() to set idle and runtime duration. Some architectures provide interface to get actual idle time observed by the hardware. So, the effective idle percent can be adjusted using the hardware feedback. For example, Intel CPUs provides package idle counters, which is currently used by Intel powerclamp driver to readjust runtime duration. When the caller's desired idle time over a period is less or greater than the actual CPU idle time observed by the hardware, caller can readjust idle and runtime duration for the next cycle. The only way this can be done currently is by monitoring hardware idle time from a different software thread and readjust idle and runtime duration using idle_inject_set_duration(). This can be avoided by adding a callback which callers can register and readjust from this callback function. Add a capability to register an optional update() callback, which can be called from the idle inject core before waking up CPUs for idle injection. This callback can be registered via a new interface: idle_inject_register_full(). During this process of constantly adjusting idle and runtime duration there can be some cases where actual idle time is more than the desired. In this case idle inject can be skipped for a cycle. If update() callback returns false, then the idle inject core skips waking up CPUs for the idle injection. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- include/linux/idle_inject.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h index fb88e23a99d3..a85d5dd40f72 100644 --- a/include/linux/idle_inject.h +++ b/include/linux/idle_inject.h @@ -13,6 +13,9 @@ struct idle_inject_device; struct idle_inject_device *idle_inject_register(struct cpumask *cpumask); +struct idle_inject_device *idle_inject_register_full(struct cpumask *cpumask, + bool (*update)(void)); + void idle_inject_unregister(struct idle_inject_device *ii_dev); int idle_inject_start(struct idle_inject_device *ii_dev); -- cgit v1.2.3 From 3f88b459a729ea397aa7cec9a7054a978882370a Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 2 Dec 2022 23:33:20 +0100 Subject: platform/surface: aggregator: Improve documentation and handling of message target and source IDs The `tid_in` and `tid_out` fields of the serial hub protocol command struct (struct ssh_command) are actually source and target IDs, indicating the peer from which the message originated and the peer for which it is intended. Change the naming of those fields accordingly and improve the protocol documentation. Additionally, introduce an enum containing all currently known peers, i.e. targets and sources. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20221202223327.690880-3-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/controller.h | 4 +-- include/linux/surface_aggregator/serial_hub.h | 40 ++++++++++++++++++--------- 2 files changed, 29 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h index d11a1c6e3186..8932bc0bae18 100644 --- a/include/linux/surface_aggregator/controller.h +++ b/include/linux/surface_aggregator/controller.h @@ -912,10 +912,10 @@ enum ssam_event_mask { }) #define SSAM_EVENT_REGISTRY_SAM \ - SSAM_EVENT_REGISTRY(SSAM_SSH_TC_SAM, 0x01, 0x0b, 0x0c) + SSAM_EVENT_REGISTRY(SSAM_SSH_TC_SAM, SSAM_SSH_TID_SAM, 0x0b, 0x0c) #define SSAM_EVENT_REGISTRY_KIP \ - SSAM_EVENT_REGISTRY(SSAM_SSH_TC_KIP, 0x02, 0x27, 0x28) + SSAM_EVENT_REGISTRY(SSAM_SSH_TC_KIP, SSAM_SSH_TID_KIP, 0x27, 0x28) #define SSAM_EVENT_REGISTRY_REG(tid)\ SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, tid, 0x01, 0x02) diff --git a/include/linux/surface_aggregator/serial_hub.h b/include/linux/surface_aggregator/serial_hub.h index 45501b6e54e8..5c4ae1a26183 100644 --- a/include/linux/surface_aggregator/serial_hub.h +++ b/include/linux/surface_aggregator/serial_hub.h @@ -83,23 +83,21 @@ enum ssh_payload_type { /** * struct ssh_command - Payload of a command-type frame. - * @type: The type of the payload. See &enum ssh_payload_type. Should be - * SSH_PLD_TYPE_CMD for this struct. - * @tc: Command target category. - * @tid_out: Output target ID. Should be zero if this an incoming (EC to host) - * message. - * @tid_in: Input target ID. Should be zero if this is an outgoing (host to - * EC) message. - * @iid: Instance ID. - * @rqid: Request ID. Used to match requests with responses and differentiate - * between responses and events. - * @cid: Command ID. + * @type: The type of the payload. See &enum ssh_payload_type. Should be + * SSH_PLD_TYPE_CMD for this struct. + * @tc: Command target category. + * @tid: Target ID. Indicates the target of the message. + * @sid: Source ID. Indicates the source of the message. + * @iid: Instance ID. + * @rqid: Request ID. Used to match requests with responses and differentiate + * between responses and events. + * @cid: Command ID. */ struct ssh_command { u8 type; u8 tc; - u8 tid_out; - u8 tid_in; + u8 tid; + u8 sid; u8 iid; __le16 rqid; u8 cid; @@ -280,6 +278,22 @@ struct ssam_span { size_t len; }; +/** + * enum ssam_ssh_tid - Target/source IDs for Serial Hub messages. + * @SSAM_SSH_TID_HOST: We as the kernel Serial Hub driver. + * @SSAM_SSH_TID_SAM: The Surface Aggregator EC. + * @SSAM_SSH_TID_KIP: Keyboard and perihperal controller. + * @SSAM_SSH_TID_DEBUG: Debug connector. + * @SSAM_SSH_TID_SURFLINK: SurfLink connector. + */ +enum ssam_ssh_tid { + SSAM_SSH_TID_HOST = 0x00, + SSAM_SSH_TID_SAM = 0x01, + SSAM_SSH_TID_KIP = 0x02, + SSAM_SSH_TID_DEBUG = 0x03, + SSAM_SSH_TID_SURFLINK = 0x04, +}; + /* * Known SSH/EC target categories. * -- cgit v1.2.3 From 78abf1b5205534bb7deda408aa5b9b7e0bf1982e Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 2 Dec 2022 23:33:26 +0100 Subject: platform/surface: aggregator: Enforce use of target-ID enum in device ID macros Similar to the target category (TC), the target ID (TID) can be one value out of a small number of choices, given in enum ssam_ssh_tid. In the device ID macros, SSAM_SDEV() and SSAM_VDEV() we already use text expansion to, both, remove some textual clutter for the target category values and enforce that the value belongs to the known set. Now that we know the names for the target IDs, use the same trick for them as well. Also rename the SSAM_ANY_x macros to SSAM_SSH_x_ANY to better fit in. Signed-off-by: Maximilian Luz Acked-by: Sebastian Reichel Link: https://lore.kernel.org/r/20221202223327.690880-9-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/device.h | 50 +++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 46c45d1b6368..4da20b7a0ee5 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -68,9 +68,9 @@ struct ssam_device_uid { * match_flags member of the device ID structure. Do not use them directly * with struct ssam_device_id or struct ssam_device_uid. */ -#define SSAM_ANY_TID 0xffff -#define SSAM_ANY_IID 0xffff -#define SSAM_ANY_FUN 0xffff +#define SSAM_SSH_TID_ANY 0xffff +#define SSAM_SSH_IID_ANY 0xffff +#define SSAM_SSH_FUN_ANY 0xffff /** * SSAM_DEVICE() - Initialize a &struct ssam_device_id with the given @@ -83,25 +83,25 @@ struct ssam_device_uid { * * Initializes a &struct ssam_device_id with the given parameters. See &struct * ssam_device_uid for details regarding the parameters. The special values - * %SSAM_ANY_TID, %SSAM_ANY_IID, and %SSAM_ANY_FUN can be used to specify that + * %SSAM_SSH_TID_ANY, %SSAM_SSH_IID_ANY, and %SSAM_SSH_FUN_ANY can be used to specify that * matching should ignore target ID, instance ID, and/or sub-function, * respectively. This macro initializes the ``match_flags`` field based on the * given parameters. * * Note: The parameters @d and @cat must be valid &u8 values, the parameters - * @tid, @iid, and @fun must be either valid &u8 values or %SSAM_ANY_TID, - * %SSAM_ANY_IID, or %SSAM_ANY_FUN, respectively. Other non-&u8 values are not + * @tid, @iid, and @fun must be either valid &u8 values or %SSAM_SSH_TID_ANY, + * %SSAM_SSH_IID_ANY, or %SSAM_SSH_FUN_ANY, respectively. Other non-&u8 values are not * allowed. */ #define SSAM_DEVICE(d, cat, tid, iid, fun) \ - .match_flags = (((tid) != SSAM_ANY_TID) ? SSAM_MATCH_TARGET : 0) \ - | (((iid) != SSAM_ANY_IID) ? SSAM_MATCH_INSTANCE : 0) \ - | (((fun) != SSAM_ANY_FUN) ? SSAM_MATCH_FUNCTION : 0), \ + .match_flags = (((tid) != SSAM_SSH_TID_ANY) ? SSAM_MATCH_TARGET : 0) \ + | (((iid) != SSAM_SSH_IID_ANY) ? SSAM_MATCH_INSTANCE : 0) \ + | (((fun) != SSAM_SSH_FUN_ANY) ? SSAM_MATCH_FUNCTION : 0), \ .domain = d, \ .category = cat, \ - .target = __builtin_choose_expr((tid) != SSAM_ANY_TID, (tid), 0), \ - .instance = __builtin_choose_expr((iid) != SSAM_ANY_IID, (iid), 0), \ - .function = __builtin_choose_expr((fun) != SSAM_ANY_FUN, (fun), 0) + .target = __builtin_choose_expr((tid) != SSAM_SSH_TID_ANY, (tid), 0), \ + .instance = __builtin_choose_expr((iid) != SSAM_SSH_IID_ANY, (iid), 0), \ + .function = __builtin_choose_expr((fun) != SSAM_SSH_FUN_ANY, (fun), 0) /** * SSAM_VDEV() - Initialize a &struct ssam_device_id as virtual device with @@ -113,18 +113,18 @@ struct ssam_device_uid { * * Initializes a &struct ssam_device_id with the given parameters in the * virtual domain. See &struct ssam_device_uid for details regarding the - * parameters. The special values %SSAM_ANY_TID, %SSAM_ANY_IID, and - * %SSAM_ANY_FUN can be used to specify that matching should ignore target ID, + * parameters. The special values %SSAM_SSH_TID_ANY, %SSAM_SSH_IID_ANY, and + * %SSAM_SSH_FUN_ANY can be used to specify that matching should ignore target ID, * instance ID, and/or sub-function, respectively. This macro initializes the * ``match_flags`` field based on the given parameters. * * Note: The parameter @cat must be a valid &u8 value, the parameters @tid, - * @iid, and @fun must be either valid &u8 values or %SSAM_ANY_TID, - * %SSAM_ANY_IID, or %SSAM_ANY_FUN, respectively. Other non-&u8 values are not + * @iid, and @fun must be either valid &u8 values or %SSAM_SSH_TID_ANY, + * %SSAM_SSH_IID_ANY, or %SSAM_SSH_FUN_ANY, respectively. Other non-&u8 values are not * allowed. */ #define SSAM_VDEV(cat, tid, iid, fun) \ - SSAM_DEVICE(SSAM_DOMAIN_VIRTUAL, SSAM_VIRTUAL_TC_##cat, tid, iid, fun) + SSAM_DEVICE(SSAM_DOMAIN_VIRTUAL, SSAM_VIRTUAL_TC_##cat, SSAM_SSH_TID_##tid, iid, fun) /** * SSAM_SDEV() - Initialize a &struct ssam_device_id as physical SSH device @@ -136,18 +136,18 @@ struct ssam_device_uid { * * Initializes a &struct ssam_device_id with the given parameters in the SSH * domain. See &struct ssam_device_uid for details regarding the parameters. - * The special values %SSAM_ANY_TID, %SSAM_ANY_IID, and %SSAM_ANY_FUN can be - * used to specify that matching should ignore target ID, instance ID, and/or - * sub-function, respectively. This macro initializes the ``match_flags`` - * field based on the given parameters. + * The special values %SSAM_SSH_TID_ANY, %SSAM_SSH_IID_ANY, and + * %SSAM_SSH_FUN_ANY can be used to specify that matching should ignore target + * ID, instance ID, and/or sub-function, respectively. This macro initializes + * the ``match_flags`` field based on the given parameters. * * Note: The parameter @cat must be a valid &u8 value, the parameters @tid, - * @iid, and @fun must be either valid &u8 values or %SSAM_ANY_TID, - * %SSAM_ANY_IID, or %SSAM_ANY_FUN, respectively. Other non-&u8 values are not - * allowed. + * @iid, and @fun must be either valid &u8 values or %SSAM_SSH_TID_ANY, + * %SSAM_SSH_IID_ANY, or %SSAM_SSH_FUN_ANY, respectively. Other non-&u8 values + * are not allowed. */ #define SSAM_SDEV(cat, tid, iid, fun) \ - SSAM_DEVICE(SSAM_DOMAIN_SERIALHUB, SSAM_SSH_TC_##cat, tid, iid, fun) + SSAM_DEVICE(SSAM_DOMAIN_SERIALHUB, SSAM_SSH_TC_##cat, SSAM_SSH_TID_##tid, iid, fun) /* * enum ssam_device_flags - Flags for SSAM client devices. -- cgit v1.2.3 From b09ee1cd59918bcf1a6793b663034b6e345b3ced Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Tue, 20 Dec 2022 18:56:08 +0100 Subject: platform/surface: aggregator: Rename top-level request functions to avoid ambiguities We currently have a struct ssam_request_sync and a function ssam_request_sync(). While this is valid C, there are some downsides to it. One of these is that current Sphinx versions (>= 3.0) cannot disambiguate between the two (see disucssion and pull request linked below). It instead emits a "WARNING: Duplicate C declaration" and links for the struct and function in the resulting documentation link to the same entry (i.e. both to either function or struct documentation) instead of their respective own entries. While we could just ignore that and wait for a fix, there's also a point to be made that the current naming can be somewhat confusing when searching (e.g. via grep) or trying to understand the levels of abstraction at play: We currently have struct ssam_request_sync and associated functions ssam_request_sync_[alloc|free|init|wait|...]() operating on this struct. However, function ssam_request_sync() is one abstraction level above this. Similarly, ssam_request_sync_with_buffer() is not a function operating on struct ssam_request_sync, but rather a sibling to ssam_request_sync(), both using the struct under the hood. Therefore, rename the top level request functions: ssam_request_sync() -> ssam_request_do_sync() ssam_request_sync_with_buffer() -> ssam_request_do_sync_with_buffer() ssam_request_sync_onstack() -> ssam_request_do_sync_onstack() Link: https://lore.kernel.org/all/085e0ada65c11da9303d07e70c510dc45f21315b.1656756450.git.mchehab@kernel.org/ Link: https://github.com/sphinx-doc/sphinx/pull/8313 Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20221220175608.1436273-2-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/controller.h | 56 +++++++++++++-------------- include/linux/surface_aggregator/device.h | 8 ++-- 2 files changed, 32 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h index 8932bc0bae18..cb7980805920 100644 --- a/include/linux/surface_aggregator/controller.h +++ b/include/linux/surface_aggregator/controller.h @@ -207,17 +207,17 @@ static inline int ssam_request_sync_wait(struct ssam_request_sync *rqst) return rqst->status; } -int ssam_request_sync(struct ssam_controller *ctrl, - const struct ssam_request *spec, - struct ssam_response *rsp); +int ssam_request_do_sync(struct ssam_controller *ctrl, + const struct ssam_request *spec, + struct ssam_response *rsp); -int ssam_request_sync_with_buffer(struct ssam_controller *ctrl, - const struct ssam_request *spec, - struct ssam_response *rsp, - struct ssam_span *buf); +int ssam_request_do_sync_with_buffer(struct ssam_controller *ctrl, + const struct ssam_request *spec, + struct ssam_response *rsp, + struct ssam_span *buf); /** - * ssam_request_sync_onstack - Execute a synchronous request on the stack. + * ssam_request_do_sync_onstack - Execute a synchronous request on the stack. * @ctrl: The controller via which the request is submitted. * @rqst: The request specification. * @rsp: The response buffer. @@ -227,7 +227,7 @@ int ssam_request_sync_with_buffer(struct ssam_controller *ctrl, * fully initializes it via the provided request specification, submits it, * and finally waits for its completion before returning its status. This * helper macro essentially allocates the request message buffer on the stack - * and then calls ssam_request_sync_with_buffer(). + * and then calls ssam_request_do_sync_with_buffer(). * * Note: The @payload_len parameter specifies the maximum payload length, used * for buffer allocation. The actual payload length may be smaller. @@ -235,12 +235,12 @@ int ssam_request_sync_with_buffer(struct ssam_controller *ctrl, * Return: Returns the status of the request or any failure during setup, i.e. * zero on success and a negative value on failure. */ -#define ssam_request_sync_onstack(ctrl, rqst, rsp, payload_len) \ +#define ssam_request_do_sync_onstack(ctrl, rqst, rsp, payload_len) \ ({ \ u8 __data[SSH_COMMAND_MESSAGE_LENGTH(payload_len)]; \ struct ssam_span __buf = { &__data[0], ARRAY_SIZE(__data) }; \ \ - ssam_request_sync_with_buffer(ctrl, rqst, rsp, &__buf); \ + ssam_request_do_sync_with_buffer(ctrl, rqst, rsp, &__buf); \ }) /** @@ -349,7 +349,7 @@ struct ssam_request_spec_md { * zero on success and negative on failure. The ``ctrl`` parameter is the * controller via which the request is being sent. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_N(name, spec...) \ @@ -366,7 +366,7 @@ struct ssam_request_spec_md { rqst.length = 0; \ rqst.payload = NULL; \ \ - return ssam_request_sync_onstack(ctrl, &rqst, NULL, 0); \ + return ssam_request_do_sync_onstack(ctrl, &rqst, NULL, 0); \ } /** @@ -389,7 +389,7 @@ struct ssam_request_spec_md { * parameter is the controller via which the request is sent. The request * argument is specified via the ``arg`` pointer. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_W(name, atype, spec...) \ @@ -406,8 +406,8 @@ struct ssam_request_spec_md { rqst.length = sizeof(atype); \ rqst.payload = (u8 *)arg; \ \ - return ssam_request_sync_onstack(ctrl, &rqst, NULL, \ - sizeof(atype)); \ + return ssam_request_do_sync_onstack(ctrl, &rqst, NULL, \ + sizeof(atype)); \ } /** @@ -430,7 +430,7 @@ struct ssam_request_spec_md { * the controller via which the request is sent. The request's return value is * written to the memory pointed to by the ``ret`` parameter. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_R(name, rtype, spec...) \ @@ -453,7 +453,7 @@ struct ssam_request_spec_md { rsp.length = 0; \ rsp.pointer = (u8 *)ret; \ \ - status = ssam_request_sync_onstack(ctrl, &rqst, &rsp, 0); \ + status = ssam_request_do_sync_onstack(ctrl, &rqst, &rsp, 0); \ if (status) \ return status; \ \ @@ -491,7 +491,7 @@ struct ssam_request_spec_md { * request argument is specified via the ``arg`` pointer. The request's return * value is written to the memory pointed to by the ``ret`` parameter. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_WR(name, atype, rtype, spec...) \ @@ -514,7 +514,7 @@ struct ssam_request_spec_md { rsp.length = 0; \ rsp.pointer = (u8 *)ret; \ \ - status = ssam_request_sync_onstack(ctrl, &rqst, &rsp, sizeof(atype)); \ + status = ssam_request_do_sync_onstack(ctrl, &rqst, &rsp, sizeof(atype)); \ if (status) \ return status; \ \ @@ -550,7 +550,7 @@ struct ssam_request_spec_md { * parameter is the controller via which the request is sent, ``tid`` the * target ID for the request, and ``iid`` the instance ID. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_MD_N(name, spec...) \ @@ -567,7 +567,7 @@ struct ssam_request_spec_md { rqst.length = 0; \ rqst.payload = NULL; \ \ - return ssam_request_sync_onstack(ctrl, &rqst, NULL, 0); \ + return ssam_request_do_sync_onstack(ctrl, &rqst, NULL, 0); \ } /** @@ -592,7 +592,7 @@ struct ssam_request_spec_md { * ``tid`` the target ID for the request, and ``iid`` the instance ID. The * request argument is specified via the ``arg`` pointer. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_MD_W(name, atype, spec...) \ @@ -609,7 +609,7 @@ struct ssam_request_spec_md { rqst.length = sizeof(atype); \ rqst.payload = (u8 *)arg; \ \ - return ssam_request_sync_onstack(ctrl, &rqst, NULL, \ + return ssam_request_do_sync_onstack(ctrl, &rqst, NULL, \ sizeof(atype)); \ } @@ -635,7 +635,7 @@ struct ssam_request_spec_md { * the target ID for the request, and ``iid`` the instance ID. The request's * return value is written to the memory pointed to by the ``ret`` parameter. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_MD_R(name, rtype, spec...) \ @@ -658,7 +658,7 @@ struct ssam_request_spec_md { rsp.length = 0; \ rsp.pointer = (u8 *)ret; \ \ - status = ssam_request_sync_onstack(ctrl, &rqst, &rsp, 0); \ + status = ssam_request_do_sync_onstack(ctrl, &rqst, &rsp, 0); \ if (status) \ return status; \ \ @@ -698,7 +698,7 @@ struct ssam_request_spec_md { * The request argument is specified via the ``arg`` pointer. The request's * return value is written to the memory pointed to by the ``ret`` parameter. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_MD_WR(name, atype, rtype, spec...) \ @@ -722,7 +722,7 @@ struct ssam_request_spec_md { rsp.length = 0; \ rsp.pointer = (u8 *)ret; \ \ - status = ssam_request_sync_onstack(ctrl, &rqst, &rsp, sizeof(atype)); \ + status = ssam_request_do_sync_onstack(ctrl, &rqst, &rsp, sizeof(atype)); \ if (status) \ return status; \ \ diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 4da20b7a0ee5..1545e5567b15 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -456,7 +456,7 @@ static inline int ssam_device_register_clients(struct ssam_device *sdev) * device of the request and by association the controller via which the * request is sent. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_CL_N(name, spec...) \ @@ -490,7 +490,7 @@ static inline int ssam_device_register_clients(struct ssam_device *sdev) * which the request is sent. The request's argument is specified via the * ``arg`` pointer. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_CL_W(name, atype, spec...) \ @@ -524,7 +524,7 @@ static inline int ssam_device_register_clients(struct ssam_device *sdev) * the request is sent. The request's return value is written to the memory * pointed to by the ``ret`` parameter. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_CL_R(name, rtype, spec...) \ @@ -560,7 +560,7 @@ static inline int ssam_device_register_clients(struct ssam_device *sdev) * specified via the ``arg`` pointer. The request's return value is written to * the memory pointed to by the ``ret`` parameter. * - * Refer to ssam_request_sync_onstack() for more details on the behavior of + * Refer to ssam_request_do_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_CL_WR(name, atype, rtype, spec...) \ -- cgit v1.2.3 From 608723c41cd951fb32ade2f8371e61c270816175 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 1 Feb 2023 16:08:07 +0100 Subject: rcu/kvfree: Add kvfree_rcu_mightsleep() and kfree_rcu_mightsleep() The kvfree_rcu() and kfree_rcu() APIs are hazardous in that if you forget the second argument, it works, but might sleep. This sleeping can be a correctness bug from atomic contexts, and even in non-atomic contexts it might introduce unacceptable latencies. This commit therefore adds kvfree_rcu_mightsleep() and kfree_rcu_mightsleep(), which will replace the single-argument kvfree_rcu() and kfree_rcu(), respectively. This commit enables a series of commits that switch from single-argument kvfree_rcu() and kfree_rcu() to their _mightsleep() counterparts. Once all of these commits land, the single-argument versions will be removed. Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f38d4469d7f3..84433600885a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1004,6 +1004,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__, \ kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__) +#define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) +#define kfree_rcu_mightsleep(ptr) kvfree_rcu_mightsleep(ptr) + #define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME #define kvfree_rcu_arg_2(ptr, rhf) \ do { \ -- cgit v1.2.3 From 158e5e9eeaa0d7a86f2278313746ef6c8521790d Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 2 Feb 2023 15:19:21 +0100 Subject: bpf: Drop always true do_idr_lock parameter to bpf_map_free_id The do_idr_lock parameter to bpf_map_free_id was introduced by commit bd5f5f4ecb78 ("bpf: Add BPF_MAP_GET_FD_BY_ID"). However, all callers set do_idr_lock = true since commit 1e0bd5a091e5 ("bpf: Switch bpf_map ref counter to atomic64_t so bpf_map_inc() never fails"). While at it also inline __bpf_map_put into its only caller bpf_map_put now that do_idr_lock can be dropped from its signature. Signed-off-by: Tobias Klauser Link: https://lore.kernel.org/r/20230202141921.4424-1-tklauser@distanz.ch Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e11db75094d0..35c18a98c21a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1846,7 +1846,7 @@ struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_free_id(struct bpf_prog *prog); -void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); +void bpf_map_free_id(struct bpf_map *map); struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, enum btf_field_type type); -- cgit v1.2.3 From d3d854fd6a1d97157f790604e07f6386e8df8fe4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 1 Feb 2023 11:24:17 +0100 Subject: netdev-genl: create a simple family for netdev stuff Add a Netlink spec-compatible family for netdevs. This is a very simple implementation without much thought going into it. It allows us to reap all the benefits of Netlink specs, one can use the generic client to issue the commands: $ ./cli.py --spec netdev.yaml --dump dev_get [{'ifindex': 1, 'xdp-features': set()}, {'ifindex': 2, 'xdp-features': {'basic', 'ndo-xmit', 'redirect'}}, {'ifindex': 3, 'xdp-features': {'rx-sg'}}] the generic python library does not have flags-by-name support, yet, but we also don't have to carry strings in the messages, as user space can get the names from the spec. Acked-by: Jesper Dangaard Brouer Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Co-developed-by: Marek Majtyka Signed-off-by: Marek Majtyka Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/327ad9c9868becbe1e601b580c962549c8cd81f2.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2466afa25078..0f7967591288 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -2055,6 +2056,7 @@ struct net_device { /* Read-mostly cache-line for fast-path access */ unsigned int flags; + xdp_features_t xdp_features; unsigned long long priv_flags; const struct net_device_ops *netdev_ops; const struct xdp_metadata_ops *xdp_metadata_ops; @@ -2839,6 +2841,7 @@ enum netdev_cmd { NETDEV_OFFLOAD_XSTATS_DISABLE, NETDEV_OFFLOAD_XSTATS_REPORT_USED, NETDEV_OFFLOAD_XSTATS_REPORT_DELTA, + NETDEV_XDP_FEAT_CHANGE, }; const char *netdev_cmd_to_name(enum netdev_cmd cmd); -- cgit v1.2.3 From 1bd9a7b4afd5e0b938868a90b16d514c19808e6c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Jan 2023 19:37:20 +0100 Subject: phy: Remove unused phy_optional_get() There were never any upstream users of this function since its introduction almost 10 years ago. Besides, the dummy for phy_optional_get() should have returned NULL instead of an error code. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/df61992b1d66bccf4e6e1eafae94a7f7d7629f34.1674584626.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- include/linux/phy/phy.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index b1413757fcc3..1b4f9be21e01 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -250,7 +250,6 @@ static inline void phy_set_bus_width(struct phy *phy, int bus_width) phy->attrs.bus_width = bus_width; } struct phy *phy_get(struct device *dev, const char *string); -struct phy *phy_optional_get(struct device *dev, const char *string); struct phy *devm_phy_get(struct device *dev, const char *string); struct phy *devm_phy_optional_get(struct device *dev, const char *string); struct phy *devm_of_phy_get(struct device *dev, struct device_node *np, @@ -426,12 +425,6 @@ static inline struct phy *phy_get(struct device *dev, const char *string) return ERR_PTR(-ENOSYS); } -static inline struct phy *phy_optional_get(struct device *dev, - const char *string) -{ - return ERR_PTR(-ENOSYS); -} - static inline struct phy *devm_phy_get(struct device *dev, const char *string) { return ERR_PTR(-ENOSYS); -- cgit v1.2.3 From d02aa181ee595c81738b6bd7ebad6025fbee035a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Jan 2023 19:37:22 +0100 Subject: phy: Add devm_of_phy_optional_get() helper Add an optional variant of devm_of_phy_get() that also takes care of printing real errors, so drivers no longer have to open-code this operation. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/4cd0069bcff424ffc5c3a102397c02370b91985b.1674584626.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- include/linux/phy/phy.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 1b4f9be21e01..3a570bc59fc7 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -254,6 +254,8 @@ struct phy *devm_phy_get(struct device *dev, const char *string); struct phy *devm_phy_optional_get(struct device *dev, const char *string); struct phy *devm_of_phy_get(struct device *dev, struct device_node *np, const char *con_id); +struct phy *devm_of_phy_optional_get(struct device *dev, struct device_node *np, + const char *con_id); struct phy *devm_of_phy_get_by_index(struct device *dev, struct device_node *np, int index); void of_phy_put(struct phy *phy); @@ -443,6 +445,13 @@ static inline struct phy *devm_of_phy_get(struct device *dev, return ERR_PTR(-ENOSYS); } +static inline struct phy *devm_of_phy_optional_get(struct device *dev, + struct device_node *np, + const char *con_id) +{ + return NULL; +} + static inline struct phy *devm_of_phy_get_by_index(struct device *dev, struct device_node *np, int index) -- cgit v1.2.3 From f5b3c341a46ec55d93332ee5c254a278af902ffe Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 2 Feb 2023 16:54:12 +0200 Subject: mei: Move uuid_le_cmp() to its only user There is only a single user of uuid_le_cmp() API, let's make it private to that user. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230202145412.87569-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/uuid.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 5be158a49e11..6b1a3efa1e0b 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -110,9 +110,4 @@ int uuid_parse(const char *uuid, uuid_t *u); /* MEI UUID type, don't use anywhere else */ #include -static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2) -{ - return memcmp(&u1, &u2, sizeof(uuid_le)); -} - #endif -- cgit v1.2.3 From fb6f026b833a71f4701e12b43800e46d7351f7a2 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 10 Jan 2023 19:03:53 +0000 Subject: mm/damon/core: update kernel-doc comments for DAMOS action supports of each DAMON operations set Patch series "mm/damon: trivial fixups". This patchset contains patches for trivial fixups of DAMON's documentation, MAINTAINERS section, and selftests. This patch (of 8): Supports of each DAMOS action are up to DAMON operations set implementation in use, but not well mentioned on the kernel-doc comments. Add the comment. Link: https://lkml.kernel.org/r/20230110190400.119388-1-sj@kernel.org Link: https://lkml.kernel.org/r/20230110190400.119388-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/linux/damon.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 7907918ad2e0..3fa96d7c9fe4 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -91,6 +91,12 @@ struct damon_target { * @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists. * @DAMOS_STAT: Do nothing but count the stat. * @NR_DAMOS_ACTIONS: Total number of DAMOS actions + * + * The support of each action is up to running &struct damon_operations. + * &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR supports all actions except + * &enum DAMOS_LRU_PRIO and &enum DAMOS_LRU_DEPRIO. &enum DAMON_OPS_PADDR + * supports only &enum DAMOS_PAGEOUT, &enum DAMOS_LRU_PRIO, &enum + * DAMOS_LRU_DEPRIO, and &DAMOS_STAT. */ enum damos_action { DAMOS_WILLNEED, -- cgit v1.2.3 From 55901e89d2864b5ef9961892470eedf29279d412 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 10 Jan 2023 19:03:54 +0000 Subject: mm/damon/core: update kernel-doc comments for DAMOS filters supports of each DAMON operations set Supports of each DAMOS filter type are up to DAMON operations set implementation in use, but not well mentioned on the kernel-doc comments. Add the comment. Link: https://lkml.kernel.org/r/20230110190400.119388-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/linux/damon.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 3fa96d7c9fe4..dfb245bb3053 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -227,6 +227,11 @@ struct damos_stat { * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. * @NR_DAMOS_FILTER_TYPES: Number of filter types. + * + * The support of each filter type is up to running &struct damon_operations. + * &enum DAMON_OPS_PADDR is supporting all filter types, while + * &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR are not supporting any + * filter types. */ enum damos_filter_type { DAMOS_FILTER_TYPE_ANON, -- cgit v1.2.3 From c5d5546ea06512accc894cd19265c7041a6ac81a Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Tue, 10 Jan 2023 23:42:11 +0800 Subject: maple_tree: remove the parameter entry of mas_preallocate The parameter entry of mas_preallocate is not used, so drop it. Link: https://lkml.kernel.org/r/20230110154211.1758562-1-vernon2gm@gmail.com Signed-off-by: Vernon Yang Cc: Liam Howlett Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 815a27661517..a7bf58fd7cc6 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -455,7 +455,7 @@ int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp); void mas_store_prealloc(struct ma_state *mas, void *entry); void *mas_find(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); -int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); +int mas_preallocate(struct ma_state *mas, gfp_t gfp); bool mas_is_err(struct ma_state *mas); bool mas_nomem(struct ma_state *mas, gfp_t gfp); -- cgit v1.2.3 From 7d4a8be0c4b2b7ffb367929d2b352651f083806b Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 10 Jan 2023 13:57:22 +1100 Subject: mm/mmu_notifier: remove unused mmu_notifier_range_update_to_read_only export mmu_notifier_range_update_to_read_only() was originally introduced in commit c6d23413f81b ("mm/mmu_notifier: mmu_notifier_range_update_to_read_only() helper") as an optimisation for device drivers that know a range has only been mapped read-only. However there are no users of this feature so remove it. As it is the only user of the struct mmu_notifier_range.vma field remove that also. Link: https://lkml.kernel.org/r/20230110025722.600912-1-apopple@nvidia.com Signed-off-by: Alistair Popple Acked-by: Mike Rapoport (IBM) Reviewed-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Reviewed-by: Mike Kravetz Cc: Ira Weiny Cc: Jerome Glisse Cc: John Hubbard Cc: Ralph Campbell Signed-off-by: Andrew Morton --- include/linux/mmu_notifier.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index d6c06e140277..64a3e051c3c4 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -269,7 +269,6 @@ extern struct lockdep_map __mmu_notifier_invalidate_range_start_map; #endif struct mmu_notifier_range { - struct vm_area_struct *vma; struct mm_struct *mm; unsigned long start; unsigned long end; @@ -514,12 +513,10 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) static inline void mmu_notifier_range_init(struct mmu_notifier_range *range, enum mmu_notifier_event event, unsigned flags, - struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, unsigned long end) { - range->vma = vma; range->event = event; range->mm = mm; range->start = start; @@ -530,10 +527,10 @@ static inline void mmu_notifier_range_init(struct mmu_notifier_range *range, static inline void mmu_notifier_range_init_owner( struct mmu_notifier_range *range, enum mmu_notifier_event event, unsigned int flags, - struct vm_area_struct *vma, struct mm_struct *mm, - unsigned long start, unsigned long end, void *owner) + struct mm_struct *mm, unsigned long start, + unsigned long end, void *owner) { - mmu_notifier_range_init(range, event, flags, vma, mm, start, end); + mmu_notifier_range_init(range, event, flags, mm, start, end); range->owner = owner; } @@ -659,9 +656,9 @@ static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range, range->end = end; } -#define mmu_notifier_range_init(range,event,flags,vma,mm,start,end) \ +#define mmu_notifier_range_init(range,event,flags,mm,start,end) \ _mmu_notifier_range_init(range, start, end) -#define mmu_notifier_range_init_owner(range, event, flags, vma, mm, start, \ +#define mmu_notifier_range_init_owner(range, event, flags, mm, start, \ end, owner) \ _mmu_notifier_range_init(range, start, end) -- cgit v1.2.3 From 94688e8eb453e616098cb930e5f6fed4a6ea2dfa Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:47 +0000 Subject: mm: remove folio_pincount_ptr() and head_compound_pincount() We can use folio->_pincount directly, since all users are guarded by tests of compound/large. Link: https://lkml.kernel.org/r/20230111142915.1001531-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: John Hubbard Signed-off-by: Andrew Morton --- include/linux/mm.h | 14 ++------------ include/linux/mm_types.h | 5 ----- 2 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 76c97cb8ee9a..6d3945207067 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,11 +1011,6 @@ static inline void folio_set_compound_dtor(struct folio *folio, void destroy_large_folio(struct folio *folio); -static inline int head_compound_pincount(struct page *head) -{ - return atomic_read(compound_pincount_ptr(head)); -} - static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; @@ -1641,11 +1636,6 @@ static inline struct folio *pfn_folio(unsigned long pfn) return page_folio(pfn_to_page(pfn)); } -static inline atomic_t *folio_pincount_ptr(struct folio *folio) -{ - return &folio_page(folio, 1)->compound_pincount; -} - /** * folio_maybe_dma_pinned - Report if a folio may be pinned for DMA. * @folio: The folio. @@ -1663,7 +1653,7 @@ static inline atomic_t *folio_pincount_ptr(struct folio *folio) * expected to be able to deal gracefully with a false positive. * * For large folios, the result will be exactly correct. That's because - * we have more tracking data available: the compound_pincount is used + * we have more tracking data available: the _pincount field is used * instead of the GUP_PIN_COUNTING_BIAS scheme. * * For more information, please see Documentation/core-api/pin_user_pages.rst. @@ -1674,7 +1664,7 @@ static inline atomic_t *folio_pincount_ptr(struct folio *folio) static inline bool folio_maybe_dma_pinned(struct folio *folio) { if (folio_test_large(folio)) - return atomic_read(folio_pincount_ptr(folio)) > 0; + return atomic_read(&folio->_pincount) > 0; /* * folio_ref_count() is signed. If that refcount overflows, then diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 10b6eb311ede..6ff1d7db00a7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -443,11 +443,6 @@ static inline atomic_t *subpages_mapcount_ptr(struct page *page) return &page[1].subpages_mapcount; } -static inline atomic_t *compound_pincount_ptr(struct page *page) -{ - return &page[1].compound_pincount; -} - /* * Used for sizing the vmemmap region on some architectures */ -- cgit v1.2.3 From eec20426d48bd7b63c69969a793943ed1a99b731 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:48 +0000 Subject: mm: convert head_subpages_mapcount() into folio_nr_pages_mapped() Calling this 'mapcount' is confusing since mapcount is usually the number of times something is mapped; instead this is the number of mapped pages. It's also better to enforce that this is a folio rather than a head page. Move folio_nr_pages_mapped() into mm/internal.h since this is not something we want device drivers or filesystems poking at. Get rid of folio_subpages_mapcount_ptr() and use folio->_nr_pages_mapped directly. Link: https://lkml.kernel.org/r/20230111142915.1001531-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 22 ++-------------------- include/linux/mm_types.h | 12 +++--------- 2 files changed, 5 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6d3945207067..2bdd08a5b8b4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -843,24 +843,6 @@ static inline int head_compound_mapcount(struct page *head) return atomic_read(compound_mapcount_ptr(head)) + 1; } -/* - * If a 16GB hugetlb page were mapped by PTEs of all of its 4kB sub-pages, - * its subpages_mapcount would be 0x400000: choose the COMPOUND_MAPPED bit - * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently - * leaves subpages_mapcount at 0, but avoid surprise if it participates later. - */ -#define COMPOUND_MAPPED 0x800000 -#define SUBPAGES_MAPPED (COMPOUND_MAPPED - 1) - -/* - * Number of sub-pages mapped by PTE, does not include compound mapcount. - * Must be called only on head of compound page. - */ -static inline int head_subpages_mapcount(struct page *head) -{ - return atomic_read(subpages_mapcount_ptr(head)) & SUBPAGES_MAPPED; -} - /* * The atomic page->_mapcount, starts from -1: so that transitions * both from it and to it can be tracked, using atomic_inc_and_test @@ -920,9 +902,9 @@ static inline bool folio_large_is_mapped(struct folio *folio) { /* * Reading folio_mapcount_ptr() below could be omitted if hugetlb - * participated in incrementing subpages_mapcount when compound mapped. + * participated in incrementing nr_pages_mapped when compound mapped. */ - return atomic_read(folio_subpages_mapcount_ptr(folio)) > 0 || + return atomic_read(&folio->_nr_pages_mapped) > 0 || atomic_read(folio_mapcount_ptr(folio)) >= 0; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6ff1d7db00a7..4751c67b98a6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -307,7 +307,7 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) * @_folio_dtor: Which destructor to use for this folio. * @_folio_order: Do not use directly, call folio_order(). * @_compound_mapcount: Do not use directly, call folio_entire_mapcount(). - * @_subpages_mapcount: Do not use directly, call folio_mapcount(). + * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). * @_flags_2: For alignment. Do not use. @@ -361,7 +361,7 @@ struct folio { unsigned char _folio_dtor; unsigned char _folio_order; atomic_t _compound_mapcount; - atomic_t _subpages_mapcount; + atomic_t _nr_pages_mapped; atomic_t _pincount; #ifdef CONFIG_64BIT unsigned int _folio_nr_pages; @@ -404,7 +404,7 @@ FOLIO_MATCH(compound_head, _head_1); FOLIO_MATCH(compound_dtor, _folio_dtor); FOLIO_MATCH(compound_order, _folio_order); FOLIO_MATCH(compound_mapcount, _compound_mapcount); -FOLIO_MATCH(subpages_mapcount, _subpages_mapcount); +FOLIO_MATCH(subpages_mapcount, _nr_pages_mapped); FOLIO_MATCH(compound_pincount, _pincount); #ifdef CONFIG_64BIT FOLIO_MATCH(compound_nr, _folio_nr_pages); @@ -427,12 +427,6 @@ static inline atomic_t *folio_mapcount_ptr(struct folio *folio) return &tail->compound_mapcount; } -static inline atomic_t *folio_subpages_mapcount_ptr(struct folio *folio) -{ - struct page *tail = &folio->page + 1; - return &tail->subpages_mapcount; -} - static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; -- cgit v1.2.3 From b14224fbea62e5bffd680613376fe1268f4103ba Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:50 +0000 Subject: mm: convert total_compound_mapcount() to folio_total_mapcount() Instead of enforcing that the argument must be a head page by naming, enforce it with the compiler by making it a folio. Also rename the counter in struct folio from _compound_mapcount to _entire_mapcount. Link: https://lkml.kernel.org/r/20230111142915.1001531-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- include/linux/mm_types.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2bdd08a5b8b4..bdf83e75bcd6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -871,7 +871,7 @@ static inline int page_mapcount(struct page *page) return head_compound_mapcount(page) + mapcount; } -int total_compound_mapcount(struct page *head); +int folio_total_mapcount(struct folio *folio); /** * folio_mapcount() - Calculate the number of mappings of this folio. @@ -888,14 +888,14 @@ static inline int folio_mapcount(struct folio *folio) { if (likely(!folio_test_large(folio))) return atomic_read(&folio->_mapcount) + 1; - return total_compound_mapcount(&folio->page); + return folio_total_mapcount(folio); } static inline int total_mapcount(struct page *page) { if (likely(!PageCompound(page))) return atomic_read(&page->_mapcount) + 1; - return total_compound_mapcount(compound_head(page)); + return folio_total_mapcount(page_folio(page)); } static inline bool folio_large_is_mapped(struct folio *folio) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4751c67b98a6..70cbda768308 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -306,7 +306,7 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) * @_head_1: Points to the folio. Do not use. * @_folio_dtor: Which destructor to use for this folio. * @_folio_order: Do not use directly, call folio_order(). - * @_compound_mapcount: Do not use directly, call folio_entire_mapcount(). + * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). @@ -360,7 +360,7 @@ struct folio { unsigned long _head_1; unsigned char _folio_dtor; unsigned char _folio_order; - atomic_t _compound_mapcount; + atomic_t _entire_mapcount; atomic_t _nr_pages_mapped; atomic_t _pincount; #ifdef CONFIG_64BIT @@ -403,7 +403,7 @@ FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); FOLIO_MATCH(compound_dtor, _folio_dtor); FOLIO_MATCH(compound_order, _folio_order); -FOLIO_MATCH(compound_mapcount, _compound_mapcount); +FOLIO_MATCH(compound_mapcount, _entire_mapcount); FOLIO_MATCH(subpages_mapcount, _nr_pages_mapped); FOLIO_MATCH(compound_pincount, _pincount); #ifdef CONFIG_64BIT -- cgit v1.2.3 From 4d510f3da4c216d4c2695395f67aec38e2aa6cc7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:54 +0000 Subject: mm: add folio_add_new_anon_rmap() In contrast to other rmap functions, page_add_new_anon_rmap() is always called with a freshly allocated page. That means it can't be called with a tail page. Turn page_add_new_anon_rmap() into folio_add_new_anon_rmap() and add a page_add_new_anon_rmap() wrapper. Callers can be converted individually. [akpm@linux-foundation.org: fix NOMMU build. page_add_new_anon_rmap() requires CONFIG_MMU] [willy@infradead.org: folio-compat.c needs rmap.h] Link: https://lkml.kernel.org/r/20230111142915.1001531-9-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/rmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bd3504d11b15..aa682a2a93ce 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -194,6 +194,8 @@ void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long address, rmap_t flags); void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long address); +void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *, + unsigned long address); void page_add_file_rmap(struct page *, struct vm_area_struct *, bool compound); void page_remove_rmap(struct page *, struct vm_area_struct *, -- cgit v1.2.3 From c7f84b5723f1a60becd79d895ab214a7d5ee93c1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:57 +0000 Subject: mm: use entire_mapcount in __page_dup_rmap() Remove the use of the compound_mapcount_ptr() wrapper, and add an assertion that we're not passing a tail page if we're duplicating a PMD. Link: https://lkml.kernel.org/r/20230111142915.1001531-12-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/rmap.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index aa682a2a93ce..a6bd1f0a183d 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -208,7 +208,14 @@ void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, static inline void __page_dup_rmap(struct page *page, bool compound) { - atomic_inc(compound ? compound_mapcount_ptr(page) : &page->_mapcount); + if (compound) { + struct folio *folio = (struct folio *)page; + + VM_BUG_ON_PAGE(compound && !PageHead(page), page); + atomic_inc(&folio->_entire_mapcount); + } else { + atomic_inc(&page->_mapcount); + } } static inline void page_dup_file_rmap(struct page *page, bool compound) -- cgit v1.2.3 From c97eeb8f260dba098ba775e37d216f81f28559a9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:00 +0000 Subject: mm: convert page_mapcount() to use folio_entire_mapcount() Remove a use of head_compound_mapcount(). Link: https://lkml.kernel.org/r/20230111142915.1001531-15-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index bdf83e75bcd6..a6afa6c51a4d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -853,22 +853,26 @@ static inline void page_mapcount_reset(struct page *page) atomic_set(&(page)->_mapcount, -1); } -/* - * Mapcount of 0-order page; when compound sub-page, includes - * compound_mapcount of compound_head of page. +/** + * page_mapcount() - Number of times this precise page is mapped. + * @page: The page. + * + * The number of times this page is mapped. If this page is part of + * a large folio, it includes the number of times this page is mapped + * as part of that folio. * - * Result is undefined for pages which cannot be mapped into userspace. + * The result is undefined for pages which cannot be mapped into userspace. * For example SLAB or special types of pages. See function page_has_type(). - * They use this place in struct page differently. + * They use this field in struct page differently. */ static inline int page_mapcount(struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; - if (likely(!PageCompound(page))) - return mapcount; - page = compound_head(page); - return head_compound_mapcount(page) + mapcount; + if (unlikely(PageCompound(page))) + mapcount += folio_entire_mapcount(page_folio(page)); + + return mapcount; } int folio_total_mapcount(struct folio *folio); -- cgit v1.2.3 From 1aa4d03b60c0f61a8d96d5d633bf7968dbf6841f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:01 +0000 Subject: mm: remove head_compound_mapcount() and _ptr functions folio_mapcount_ptr(), compound_mapcount_ptr() and subpages_mapcount_ptr() are all now unused. Link: https://lkml.kernel.org/r/20230111142915.1001531-16-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 15 +++------------ include/linux/mm_types.h | 16 ---------------- 2 files changed, 3 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a6afa6c51a4d..7ff6e2410aa3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -831,16 +831,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) static inline int folio_entire_mapcount(struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); - return atomic_read(folio_mapcount_ptr(folio)) + 1; -} - -/* - * Mapcount of compound page as a whole, does not include mapped sub-pages. - * Must be called only on head of compound page. - */ -static inline int head_compound_mapcount(struct page *head) -{ - return atomic_read(compound_mapcount_ptr(head)) + 1; + return atomic_read(&folio->_entire_mapcount) + 1; } /* @@ -905,11 +896,11 @@ static inline int total_mapcount(struct page *page) static inline bool folio_large_is_mapped(struct folio *folio) { /* - * Reading folio_mapcount_ptr() below could be omitted if hugetlb + * Reading _entire_mapcount below could be omitted if hugetlb * participated in incrementing nr_pages_mapped when compound mapped. */ return atomic_read(&folio->_nr_pages_mapped) > 0 || - atomic_read(folio_mapcount_ptr(folio)) >= 0; + atomic_read(&folio->_entire_mapcount) >= 0; } /** diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 70cbda768308..ffcf21fbaaf0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -421,22 +421,6 @@ FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd); FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison); #undef FOLIO_MATCH -static inline atomic_t *folio_mapcount_ptr(struct folio *folio) -{ - struct page *tail = &folio->page + 1; - return &tail->compound_mapcount; -} - -static inline atomic_t *compound_mapcount_ptr(struct page *page) -{ - return &page[1].compound_mapcount; -} - -static inline atomic_t *subpages_mapcount_ptr(struct page *page) -{ - return &page[1].subpages_mapcount; -} - /* * Used for sizing the vmemmap region on some architectures */ -- cgit v1.2.3 From 5eb5cea11dcbafaa37685bc4e89e1d4ae9c434ea Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:02 +0000 Subject: mm: reimplement compound_order() Make compound_order() use struct folio. It can't be turned into a wrapper around folio_order() as a page can be turned into a tail page between a check in compound_order() and the assertion in folio_test_large(). Link: https://lkml.kernel.org/r/20230111142915.1001531-17-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7ff6e2410aa3..3adc37cebe6f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -719,11 +719,20 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; +/* + * compound_order() can be called without holding a reference, which means + * that niceties like page_folio() don't work. These callers should be + * prepared to handle wild return values. For example, PG_head may be + * set before _folio_order is initialised, or this may be a tail page. + * See compaction.c for some good examples. + */ static inline unsigned int compound_order(struct page *page) { - if (!PageHead(page)) + struct folio *folio = (struct folio *)page; + + if (!test_bit(PG_head, &folio->flags)) return 0; - return page[1].compound_order; + return folio->_folio_order; } /** -- cgit v1.2.3 From 21a000fe97a018c6d25be63892afb4fd8210ab57 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:03 +0000 Subject: mm: reimplement compound_nr() Turn compound_nr() into a wrapper around folio_nr_pages(). Similarly to compound_order(), casting the struct page directly to struct folio preserves the existing behaviour, while calling page_folio() would change the behaviour. Move thp_nr_pages() down in the file so that compound_nr() can be after folio_nr_pages(). [willy@infradead.org: fix assertion triggering] Link: https://lkml.kernel.org/r/Y8AFgZEEjnUIaCbf@casper.infradead.org Link: https://lkml.kernel.org/r/20230111142915.1001531-18-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/mm.h | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3adc37cebe6f..3acf09d5b0bd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1005,18 +1005,6 @@ static inline void set_compound_order(struct page *page, unsigned int order) #endif } -/* Returns the number of pages in this potentially compound page. */ -static inline unsigned long compound_nr(struct page *page) -{ - if (!PageHead(page)) - return 1; -#ifdef CONFIG_64BIT - return page[1].compound_nr; -#else - return 1UL << compound_order(page); -#endif -} - /* Returns the number of bytes in this potentially compound page. */ static inline unsigned long page_size(struct page *page) { @@ -1039,16 +1027,6 @@ static inline unsigned int thp_order(struct page *page) return compound_order(page); } -/** - * thp_nr_pages - The number of regular pages in this huge page. - * @page: The head page of a huge page. - */ -static inline int thp_nr_pages(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - return compound_nr(page); -} - /** * thp_size - Size of a transparent huge page. * @page: Head page of a transparent huge page. @@ -1758,6 +1736,33 @@ static inline long folio_nr_pages(struct folio *folio) #endif } +/* + * compound_nr() returns the number of pages in this potentially compound + * page. compound_nr() can be called on a tail page, and is defined to + * return 1 in that case. + */ +static inline unsigned long compound_nr(struct page *page) +{ + struct folio *folio = (struct folio *)page; + + if (!test_bit(PG_head, &folio->flags)) + return 1; +#ifdef CONFIG_64BIT + return folio->_folio_nr_pages; +#else + return 1L << folio->_folio_order; +#endif +} + +/** + * thp_nr_pages - The number of regular pages in this huge page. + * @page: The head page of a huge page. + */ +static inline int thp_nr_pages(struct page *page) +{ + return folio_nr_pages((struct folio *)page); +} + /** * folio_next - Move to the next physical folio. * @folio: The folio we're currently operating on. -- cgit v1.2.3 From bad6da64565846ef5ba85b0b685cfde9db0085dc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:04 +0000 Subject: mm: convert set_compound_page_dtor() and set_compound_order() to folios Replace uses of compound_dtor, compound_order and compound_nr by their folio equivalents. Link: https://lkml.kernel.org/r/20230111142915.1001531-19-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3acf09d5b0bd..836b96e08a14 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -984,8 +984,11 @@ extern compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS]; static inline void set_compound_page_dtor(struct page *page, enum compound_dtor_id compound_dtor) { + struct folio *folio = (struct folio *)page; + VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page); - page[1].compound_dtor = compound_dtor; + VM_BUG_ON_PAGE(!PageHead(page), page); + folio->_folio_dtor = compound_dtor; } static inline void folio_set_compound_dtor(struct folio *folio, @@ -999,9 +1002,11 @@ void destroy_large_folio(struct folio *folio); static inline void set_compound_order(struct page *page, unsigned int order) { - page[1].compound_order = order; + struct folio *folio = (struct folio *)page; + + folio->_folio_order = order; #ifdef CONFIG_64BIT - page[1].compound_nr = 1U << order; + folio->_folio_nr_pages = 1U << order; #endif } -- cgit v1.2.3 From 1c5509be58f636afabbdaf66e7436da8ec0a1828 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:08 +0000 Subject: mm: remove 'First tail page' members from struct page All former users now use the folio equivalents, so remove them from the definition of struct page. Link: https://lkml.kernel.org/r/20230111142915.1001531-23-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ffcf21fbaaf0..94b1707f5d33 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -140,16 +140,6 @@ struct page { }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ - - /* First tail page only */ - unsigned char compound_dtor; - unsigned char compound_order; - atomic_t compound_mapcount; - atomic_t subpages_mapcount; - atomic_t compound_pincount; -#ifdef CONFIG_64BIT - unsigned int compound_nr; /* 1 << compound_order */ -#endif }; struct { /* Second tail page of transparent huge page */ unsigned long _compound_pad_1; /* compound_head */ @@ -401,14 +391,6 @@ FOLIO_MATCH(memcg_data, memcg_data); offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); -FOLIO_MATCH(compound_dtor, _folio_dtor); -FOLIO_MATCH(compound_order, _folio_order); -FOLIO_MATCH(compound_mapcount, _entire_mapcount); -FOLIO_MATCH(subpages_mapcount, _nr_pages_mapped); -FOLIO_MATCH(compound_pincount, _pincount); -#ifdef CONFIG_64BIT -FOLIO_MATCH(compound_nr, _folio_nr_pages); -#endif #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ -- cgit v1.2.3 From a8d55327ccc1f999a5fba4eee67ed08bd36493ad Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:09 +0000 Subject: doc: correct struct folio kernel-doc Insert appropriate public: and private: markers to make the generated kernel-doc look right. Link: https://lkml.kernel.org/r/20230111142915.1001531-24-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 94b1707f5d33..d458e9b8496c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -292,16 +292,12 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. - * @_flags_1: For large folios, additional page flags. - * @_head_1: Points to the folio. Do not use. * @_folio_dtor: Which destructor to use for this folio. * @_folio_order: Do not use directly, call folio_order(). * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). - * @_flags_2: For alignment. Do not use. - * @_head_2: Points to the folio. Do not use. * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. @@ -348,6 +344,7 @@ struct folio { struct { unsigned long _flags_1; unsigned long _head_1; + /* public: */ unsigned char _folio_dtor; unsigned char _folio_order; atomic_t _entire_mapcount; @@ -356,6 +353,7 @@ struct folio { #ifdef CONFIG_64BIT unsigned int _folio_nr_pages; #endif + /* private: the union with struct page is transitional */ }; struct page __page_1; }; @@ -363,10 +361,12 @@ struct folio { struct { unsigned long _flags_2; unsigned long _head_2; + /* public: */ void *_hugetlb_subpool; void *_hugetlb_cgroup; void *_hugetlb_cgroup_rsvd; void *_hugetlb_hwpoison; + /* private: the union with struct page is transitional */ }; struct page __page_2; }; -- cgit v1.2.3 From 4375a553f46c6cb66d1711d8f514dfdf34ce74b0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:10 +0000 Subject: mm: move page->deferred_list to folio->_deferred_list Remove the entire block of definitions for the second tail page, and add the deferred list to the struct folio. This actually moves _deferred_list to a different offset in struct folio because I don't see a need to include the padding. This lets us use list_for_each_entry_safe() in deferred_split_scan() and avoid a number of calls to compound_head(). Link: https://lkml.kernel.org/r/20230111142915.1001531-25-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 9 ++++----- include/linux/mm_types.h | 14 ++++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index a1341fdcf666..aacfcb02606f 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -295,11 +295,10 @@ static inline bool thp_migration_supported(void) static inline struct list_head *page_deferred_list(struct page *page) { - /* - * See organization of tail pages of compound page in - * "struct page" definition. - */ - return &page[2].deferred_list; + struct folio *folio = (struct folio *)page; + + VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio); + return &folio->_deferred_list; } #else /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d458e9b8496c..7eb4d0815a78 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -141,12 +141,6 @@ struct page { struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ }; - struct { /* Second tail page of transparent huge page */ - unsigned long _compound_pad_1; /* compound_head */ - unsigned long _compound_pad_2; - /* For both global and memcg */ - struct list_head deferred_list; - }; struct { /* Second tail page of hugetlb page */ unsigned long _hugetlb_pad_1; /* compound_head */ void *hugetlb_subpool; @@ -302,6 +296,7 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head(). + * @_deferred_list: Folios to be split under memory pressure. * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that @@ -366,6 +361,13 @@ struct folio { void *_hugetlb_cgroup; void *_hugetlb_cgroup_rsvd; void *_hugetlb_hwpoison; + /* private: the union with struct page is transitional */ + }; + struct { + unsigned long _flags_2a; + unsigned long _head_2a; + /* public: */ + struct list_head _deferred_list; /* private: the union with struct page is transitional */ }; struct page __page_2; -- cgit v1.2.3 From 8991de90e99755b13026b1db32d1fa52e94c6a96 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:11 +0000 Subject: mm/huge_memory: remove page_deferred_list() Use folio->_deferred_list directly. Link: https://lkml.kernel.org/r/20230111142915.1001531-26-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index aacfcb02606f..b9978978a160 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -293,14 +293,6 @@ static inline bool thp_migration_supported(void) return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); } -static inline struct list_head *page_deferred_list(struct page *page) -{ - struct folio *folio = (struct folio *)page; - - VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio); - return &folio->_deferred_list; -} - #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) -- cgit v1.2.3 From f158ed6195ef949060811fd85086928470651944 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:13 +0000 Subject: mm: convert deferred_split_huge_page() to deferred_split_folio() Now that both callers use a folio, pass the folio in and save a call to compound_head(). Link: https://lkml.kernel.org/r/20230111142915.1001531-28-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b9978978a160..70bd867eba94 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -187,7 +187,7 @@ static inline int split_huge_page(struct page *page) { return split_huge_page_to_list(page, NULL); } -void deferred_split_huge_page(struct page *page); +void deferred_split_folio(struct folio *folio); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct folio *folio); @@ -340,7 +340,7 @@ static inline int split_huge_page(struct page *page) { return 0; } -static inline void deferred_split_huge_page(struct page *page) {} +static inline void deferred_split_folio(struct folio *folio) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) -- cgit v1.2.3 From 6a171c16e62f854e6a7e0f837dbe8f3ace0f00ce Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 11 Jan 2023 14:29:14 +0000 Subject: mm: remove the hugetlb field from struct page Patch series "Get rid of tail page fields". Continue the shrinkage of the struct page definition by getting rid of the 'first tail page' and 'second tail page' fields. I originally did this patch set before Hugh's rewrite of the subpages_mapcount, so it needed substantial updates; hope I didn't miss anything. This patch (of 28): commit dad6a5eb5556(mm,hugetlb: use folio fields in second tail page) added a transitional hugetlb field to struct page and struct folio to make room for another int in the first tail of a compound page. Hugetlb folio conversions have changed all page users of this field to use the fields within the folio so struct page no longer needs this hugetlb specific field. Link: https://lkml.kernel.org/r/20230111142915.1001531-1-willy@infradead.org Link: https://lkml.kernel.org/r/20230111142915.1001531-29-willy@infradead.org Signed-off-by: Sidhartha Kumar Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7eb4d0815a78..452920467223 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -141,14 +141,6 @@ struct page { struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ }; - struct { /* Second tail page of hugetlb page */ - unsigned long _hugetlb_pad_1; /* compound_head */ - void *hugetlb_subpool; - void *hugetlb_cgroup; - void *hugetlb_cgroup_rsvd; - void *hugetlb_hwpoison; - /* No more space on 32-bit: use third tail if more */ - }; struct { /* Page table pages */ unsigned long _pt_pad_1; /* compound_head */ pgtable_t pmd_huge_pte; /* protected by page->ptl */ @@ -399,10 +391,6 @@ FOLIO_MATCH(compound_head, _head_1); offsetof(struct page, pg) + 2 * sizeof(struct page)) FOLIO_MATCH(flags, _flags_2); FOLIO_MATCH(compound_head, _head_2); -FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool); -FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup); -FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd); -FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison); #undef FOLIO_MATCH /* -- cgit v1.2.3 From 2ff6cecee669bf0fc63eadebac8cfc81f74b9a4c Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 12 Jan 2023 14:46:03 -0600 Subject: mm/memory-failure: convert hugetlb_clear_page_hwpoison to folios Change hugetlb_clear_page_hwpoison() to folio_clear_hugetlb_hwpoison() by changing the function to take in a folio. This converts one use of ClearPageHWPoison and HPageRawHwpUnreliable to their folio equivalents. Link: https://lkml.kernel.org/r/20230112204608.80136-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Acked-by: Naoya Horiguchi Cc: Matthew Wilcox Cc: Miaohe Lin Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7d6413c3b8f5..cf60fe741c1d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -878,9 +878,9 @@ extern int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); #ifdef CONFIG_MEMORY_FAILURE -extern void hugetlb_clear_page_hwpoison(struct page *hpage); +extern void folio_clear_hugetlb_hwpoison(struct folio *folio); #else -static inline void hugetlb_clear_page_hwpoison(struct page *hpage) +static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) { } #endif -- cgit v1.2.3 From 8115612883978069fee8793f873a627ff5868718 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 12 Jan 2023 12:39:28 +0000 Subject: mm: pagevec: add folio_batch_reinit() Patch series "update mlock to use folios", v4. This series updates mlock to use folios, converting the internal interface to using folios exclusively and exposing the folio interface externally. As a product of this we move to using a folio batch rather than a pagevec for mlock folios, which brings it in line with the core folio batches contained in mm/swap.c. This patch (of 5): This performs the same task as pagevec_reinit(), only modifying a folio batch rather than a pagevec. Link: https://lkml.kernel.org/r/cover.1673526881.git.lstoakes@gmail.com Link: https://lkml.kernel.org/r/9018cecacb39e34c883540f997f9be8281153613.1673526881.git.lstoakes@gmail.com Signed-off-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Cc: Christian Brauner Cc: Geert Uytterhoeven Cc: Hugh Dickins Cc: Joel Fernandes (Google) Cc: Jonathan Corbet Cc: Liam R. Howlett Cc: Matthew Wilcox Cc: Mike Rapoport (IBM) Cc: William Kucharski Signed-off-by: Andrew Morton --- include/linux/pagevec.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 215eb6c3bdc9..2a6f61a0c10a 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -103,6 +103,11 @@ static inline void folio_batch_init(struct folio_batch *fbatch) fbatch->percpu_pvec_drained = false; } +static inline void folio_batch_reinit(struct folio_batch *fbatch) +{ + fbatch->nr = 0; +} + static inline unsigned int folio_batch_count(struct folio_batch *fbatch) { return fbatch->nr; -- cgit v1.2.3 From 950fe885a89770619e315f9b46301eebf0aab7b3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 13 Jan 2023 18:10:26 +0100 Subject: mm: remove __HAVE_ARCH_PTE_SWP_EXCLUSIVE __HAVE_ARCH_PTE_SWP_EXCLUSIVE is now supported by all architectures that support swp PTEs, so let's drop it. Link: https://lkml.kernel.org/r/20230113171026.582290-27-david@redhat.com Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 1159b25b0542..5fd45454c073 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1064,35 +1064,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define arch_start_context_switch(prev) do {} while (0) #endif -/* - * When replacing an anonymous page by a real (!non) swap entry, we clear - * PG_anon_exclusive from the page and instead remember whether the flag was - * set in the swp pte. During fork(), we have to mark the entry as !exclusive - * (possibly shared). On swapin, we use that information to restore - * PG_anon_exclusive, which is very helpful in cases where we might have - * additional (e.g., FOLL_GET) references on a page and wouldn't be able to - * detect exclusivity. - * - * These functions don't apply to non-swap entries (e.g., migration, hwpoison, - * ...). - */ -#ifndef __HAVE_ARCH_PTE_SWP_EXCLUSIVE -static inline pte_t pte_swp_mkexclusive(pte_t pte) -{ - return pte; -} - -static inline int pte_swp_exclusive(pte_t pte) -{ - return false; -} - -static inline pte_t pte_swp_clear_exclusive(pte_t pte) -{ - return pte; -} -#endif - #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) -- cgit v1.2.3 From 6189eb82f0aec8a877190bf52e629c687ed02773 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 13 Jan 2023 15:42:53 +0000 Subject: mm/page_ext: do not allocate space for page_ext->flags if not needed There is 8 byte page_ext->flags field allocated per page whenever CONFIG_PAGE_EXTENSION is enabled. However, not every user of page_ext uses flags. Therefore, check whether flags is needed at least by one user and if so allocate space for it. For example when page_table_check is enabled, on a machine with 128G of memory before the fix: [ 2.244288] allocated 536870912 bytes of page_ext after the fix: [ 2.160154] allocated 268435456 bytes of page_ext Also, add a kernel-doc comment before page_ext_operations that describes the fields, and remove check if need() is set, as that is now a required field. [pasha.tatashin@soleen.com: address comments from Mike Rapoport] Link: https://lkml.kernel.org/r/20230117202103.1412449-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20230113154253.92480-1-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: David Rientjes Reviewed-by: Mike Rapoport (IBM) Cc: Charan Teja Kalla Cc: Li Zhe Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/page_ext.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 22be4582faae..67314f648aeb 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -7,15 +7,33 @@ #include struct pglist_data; + +/** + * struct page_ext_operations - per page_ext client operations + * @offset: Offset to the client's data within page_ext. Offset is returned to + * the client by page_ext_init. + * @size: The size of the client data within page_ext. + * @need: Function that returns true if client requires page_ext. + * @init: (optional) Called to initialize client once page_exts are allocated. + * @need_shared_flags: True when client is using shared page_ext->flags + * field. + * + * Each Page Extension client must define page_ext_operations in + * page_ext_ops array. + */ struct page_ext_operations { size_t offset; size_t size; bool (*need)(void); void (*init)(void); + bool need_shared_flags; }; #ifdef CONFIG_PAGE_EXTENSION +/* + * The page_ext_flags users must set need_shared_flags to true. + */ enum page_ext_flags { PAGE_EXT_OWNER, PAGE_EXT_OWNER_ALLOCATED, -- cgit v1.2.3 From 2973d8229b78d3f148e0c45916a1e8b237dc6167 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 13 Jan 2023 11:12:17 +0000 Subject: mm: discard __GFP_ATOMIC __GFP_ATOMIC serves little purpose. Its main effect is to set ALLOC_HARDER which adds a few little boosts to increase the chance of an allocation succeeding, one of which is to lower the water-mark at which it will succeed. It is *always* paired with __GFP_HIGH which sets ALLOC_HIGH which also adjusts this watermark. It is probable that other users of __GFP_HIGH should benefit from the other little bonuses that __GFP_ATOMIC gets. __GFP_ATOMIC also gives a warning if used with __GFP_DIRECT_RECLAIM. There is little point to this. We already get a might_sleep() warning if __GFP_DIRECT_RECLAIM is set. __GFP_ATOMIC allows the "watermark_boost" to be side-stepped. It is probable that testing ALLOC_HARDER is a better fit here. __GFP_ATOMIC is used by tegra-smmu.c to check if the allocation might sleep. This should test __GFP_DIRECT_RECLAIM instead. This patch: - removes __GFP_ATOMIC - allows __GFP_HIGH allocations to ignore watermark boosting as well as GFP_ATOMIC requests. - makes other adjustments as suggested by the above. The net result is not change to GFP_ATOMIC allocations. Other allocations that use __GFP_HIGH will benefit from a few different extra privileges. This affects: xen, dm, md, ntfs3 the vermillion frame buffer hibernation ksm swap all of which likely produce more benefit than cost if these selected allocation are more likely to succeed quickly. [mgorman: Minor adjustments to rework on top of a series] Link: https://lkml.kernel.org/r/163712397076.13692.4727608274002939094@noble.neil.brown.name Link: https://lkml.kernel.org/r/20230113111217.14134-7-mgorman@techsingularity.net Signed-off-by: NeilBrown Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Matthew Wilcox Cc: Thierry Reding Signed-off-by: Andrew Morton --- include/linux/gfp_types.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index d88c46ca82e1..5088637fe5c2 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -31,7 +31,7 @@ typedef unsigned int __bitwise gfp_t; #define ___GFP_IO 0x40u #define ___GFP_FS 0x80u #define ___GFP_ZERO 0x100u -#define ___GFP_ATOMIC 0x200u +/* 0x200u unused */ #define ___GFP_DIRECT_RECLAIM 0x400u #define ___GFP_KSWAPD_RECLAIM 0x800u #define ___GFP_WRITE 0x1000u @@ -116,11 +116,8 @@ typedef unsigned int __bitwise gfp_t; * * %__GFP_HIGH indicates that the caller is high-priority and that granting * the request is necessary before the system can make forward progress. - * For example, creating an IO context to clean pages. - * - * %__GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is - * high priority. Users are typically interrupt handlers. This may be - * used in conjunction with %__GFP_HIGH + * For example creating an IO context to clean pages and requests + * from atomic context. * * %__GFP_MEMALLOC allows access to all memory. This should only be used when * the caller guarantees the allocation will allow more memory to be freed @@ -135,7 +132,6 @@ typedef unsigned int __bitwise gfp_t; * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. * This takes precedence over the %__GFP_MEMALLOC flag if both are set. */ -#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) #define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) @@ -329,7 +325,7 @@ typedef unsigned int __bitwise gfp_t; * version does not attempt reclaim/compaction at all and is by default used * in page fault path, while the non-light is used by khugepaged. */ -#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) +#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) -- cgit v1.2.3 From 2cf1338454a8a9a0b3c1271ccb521afa2d6ae241 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Fri, 13 Jan 2023 11:30:11 +0900 Subject: mm: fix khugepaged with shmem_enabled=advise Pass vm_flags as a parameter to shmem_is_huge, rather than reading the flags from the vm_area_struct in question. This allows the updated flags from hugepage_madvise to be passed to the check, which is necessary because madvise does not update the vm_area_struct's flags until after hugepage_madvise returns. This fixes an issue when shmem_enabled=madvise, where MADV_HUGEPAGE on shmem was not able to register the mm_struct with khugepaged. Prior to cd89fb065099, the mm_struct was registered by MADV_HUGEPAGE regardless of the value of shmem_enabled (which was only checked when scanning vmas). Link: https://lkml.kernel.org/r/20230113023011.1784015-1-stevensd@google.com Fixes: cd89fb065099 ("mm,thp,shmem: make khugepaged obey tmpfs mount flags") Signed-off-by: David Stevens Cc: David Stevens Cc: Hugh Dickins Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index d500ea967dc7..d09d54be4ffd 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -92,14 +92,8 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); -extern bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, - pgoff_t index, bool shmem_huge_force); -static inline bool shmem_huge_enabled(struct vm_area_struct *vma, - bool shmem_huge_force) -{ - return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff, - shmem_huge_force); -} +extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, + struct mm_struct *mm, unsigned long vm_flags); extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end); -- cgit v1.2.3 From ee7a5906ff08e435ed95ec9fe7c7eed2c11015d2 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 4 Jan 2023 13:14:26 -0800 Subject: pagemap: add filemap_grab_folio() Patch series "Convert to filemap_get_folios_tag()", v5. This patch series replaces find_get_pages_range_tag() with filemap_get_folios_tag(). This also allows the removal of multiple calls to compound_head() throughout. It also makes a good chunk of the straightforward conversions to folios, and takes the opportunity to introduce a function that grabs a folio from the pagecache. This patch (of 23): Add function filemap_grab_folio() to grab a folio from the page cache. This function is meant to serve as a folio replacement for grab_cache_page, and is used to facilitate the removal of find_get_pages_range_tag(). Link: https://lkml.kernel.org/r/20230104211448.4804-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20230104211448.4804-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 29e1f9e76eb6..468183be67be 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -546,6 +546,26 @@ static inline struct folio *filemap_lock_folio(struct address_space *mapping, return __filemap_get_folio(mapping, index, FGP_LOCK, 0); } +/** + * filemap_grab_folio - grab a folio from the page cache + * @mapping: The address space to search + * @index: The page index + * + * Looks up the page cache entry at @mapping & @index. If no folio is found, + * a new folio is created. The folio is locked, marked as accessed, and + * returned. + * + * Return: A found or created folio. NULL if no folio is found and failed to + * create a folio. + */ +static inline struct folio *filemap_grab_folio(struct address_space *mapping, + pgoff_t index) +{ + return __filemap_get_folio(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping_gfp_mask(mapping)); +} + /** * find_get_page - find and get a page reference * @mapping: the address_space to search -- cgit v1.2.3 From 247f9e1feef4e57911510c8f82348efb4491ea0e Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 4 Jan 2023 13:14:27 -0800 Subject: filemap: add filemap_get_folios_tag() This is the equivalent of find_get_pages_range_tag(), except for folios instead of pages. One noteable difference is filemap_get_folios_tag() does not take in a maximum pages argument. It instead tries to fill a folio batch and stops either once full (15 folios) or reaching the end of the search range. The new function supports large folios, the initial function did not since all callers don't use large folios. Link: https://lkml.kernel.org/r/20230104211448.4804-3-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcow (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 468183be67be..bb3c1d51b1cb 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -739,6 +739,8 @@ unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); unsigned filemap_get_folios_contig(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); +unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, + pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch); unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag, unsigned int nr_pages, struct page **pages); -- cgit v1.2.3 From c5792d9384113de4085dfbce6940e2a853debb67 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 4 Jan 2023 13:14:48 -0800 Subject: filemap: remove find_get_pages_range_tag() All callers to find_get_pages_range_tag(), find_get_pages_tag(), pagevec_lookup_range_tag(), and pagevec_lookup_tag() have been removed. Link: https://lkml.kernel.org/r/20230104211448.4804-24-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 10 ---------- include/linux/pagevec.h | 8 -------- 2 files changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bb3c1d51b1cb..9f1081683771 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -741,16 +741,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch); -unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, - pgoff_t end, xa_mark_t tag, unsigned int nr_pages, - struct page **pages); -static inline unsigned find_get_pages_tag(struct address_space *mapping, - pgoff_t *index, xa_mark_t tag, unsigned int nr_pages, - struct page **pages) -{ - return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag, - nr_pages, pages); -} struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index); diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 2a6f61a0c10a..f582f7213ea5 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -26,14 +26,6 @@ struct pagevec { }; void __pagevec_release(struct pagevec *pvec); -unsigned pagevec_lookup_range_tag(struct pagevec *pvec, - struct address_space *mapping, pgoff_t *index, pgoff_t end, - xa_mark_t tag); -static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, - struct address_space *mapping, pgoff_t *index, xa_mark_t tag) -{ - return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag); -} static inline void pagevec_init(struct pagevec *pvec) { -- cgit v1.2.3 From 6bc56a4d855303705802c5ede4625973637484c7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Jan 2023 19:18:09 +0000 Subject: mm: add vma_alloc_zeroed_movable_folio() Replace alloc_zeroed_user_highpage_movable(). The main difference is returning a folio containing a single page instead of returning the page, but take the opportunity to rename the function to match other allocation functions a little better and rewrite the documentation to place more emphasis on the zeroing rather than the highmem aspect. Link: https://lkml.kernel.org/r/20230116191813.2145215-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Signed-off-by: Andrew Morton --- include/linux/highmem.h | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index d7097b8158f2..e22509420ac6 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -207,31 +207,30 @@ static inline void clear_user_highpage(struct page *page, unsigned long vaddr) } #endif -#ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE +#ifndef vma_alloc_zeroed_movable_folio /** - * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move - * @vma: The VMA the page is to be allocated for - * @vaddr: The virtual address the page will be inserted into + * vma_alloc_zeroed_movable_folio - Allocate a zeroed page for a VMA. + * @vma: The VMA the page is to be allocated for. + * @vaddr: The virtual address the page will be inserted into. * - * Returns: The allocated and zeroed HIGHMEM page + * This function will allocate a page suitable for inserting into this + * VMA at this virtual address. It may be allocated from highmem or + * the movable zone. An architecture may provide its own implementation. * - * This function will allocate a page for a VMA that the caller knows will - * be able to migrate in the future using move_pages() or reclaimed - * - * An architecture may override this function by defining - * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE and providing their own - * implementation. + * Return: A folio containing one allocated and zeroed page or NULL if + * we are out of memory. */ -static inline struct page * -alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, +static inline +struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr) { - struct page *page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); + struct folio *folio; - if (page) - clear_user_highpage(page, vaddr); + folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr, false); + if (folio) + clear_user_highpage(&folio->page, vaddr); - return page; + return folio; } #endif -- cgit v1.2.3 From 9cfb816b1c6c99f4b3c1d4a0fb096162cd17ec71 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Jan 2023 19:25:06 +0000 Subject: mm/fs: convert inode_attach_wb() to take a folio Patch series "Writeback folio conversions". Remove more calls to compound_head() by passing folios around instead of pages. This patch (of 2): The only caller of inode_attach_wb() which doesn't pass NULL already has a folio, so convert the whole call-chain to take folios. Link: https://lkml.kernel.org/r/20230116192507.2146150-1-willy@infradead.org Link: https://lkml.kernel.org/r/20230116192507.2146150-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/writeback.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 2554b71765e9..3f1491b07474 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -207,7 +207,7 @@ static inline void wait_on_inode(struct inode *inode) #include #include -void __inode_attach_wb(struct inode *inode, struct page *page); +void __inode_attach_wb(struct inode *inode, struct folio *folio); void wbc_attach_and_unlock_inode(struct writeback_control *wbc, struct inode *inode) __releases(&inode->i_lock); @@ -222,16 +222,16 @@ bool cleanup_offline_cgwb(struct bdi_writeback *wb); /** * inode_attach_wb - associate an inode with its wb * @inode: inode of interest - * @page: page being dirtied (may be NULL) + * @folio: folio being dirtied (may be NULL) * * If @inode doesn't have its wb, associate it with the wb matching the - * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o + * memcg of @folio or, if @folio is NULL, %current. May be called w/ or w/o * @inode->i_lock. */ -static inline void inode_attach_wb(struct inode *inode, struct page *page) +static inline void inode_attach_wb(struct inode *inode, struct folio *folio) { if (!inode->i_wb) - __inode_attach_wb(inode, page); + __inode_attach_wb(inode, folio); } /** @@ -290,7 +290,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) #else /* CONFIG_CGROUP_WRITEBACK */ -static inline void inode_attach_wb(struct inode *inode, struct page *page) +static inline void inode_attach_wb(struct inode *inode, struct folio *folio) { } -- cgit v1.2.3 From 75376c6fb93b99e94192cfff48222d11819ee917 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Jan 2023 19:25:07 +0000 Subject: mm: convert mem_cgroup_css_from_page() to mem_cgroup_css_from_folio() Only one caller doesn't have a folio, so move the page_folio() call to that one caller from mem_cgroup_css_from_folio(). Link: https://lkml.kernel.org/r/20230116192507.2146150-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e605fc885f08..35478695cabf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -890,7 +890,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, return match; } -struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); +struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio); ino_t page_cgroup_ino(struct page *page); static inline bool mem_cgroup_online(struct mem_cgroup *memcg) -- cgit v1.2.3 From 7ec7096b8577d3b899c1dae456a414f2d08c7ddb Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Tue, 17 Jan 2023 20:46:17 +0000 Subject: mm/page_ext: init page_ext early if there are no deferred struct pages page_ext must be initialized after all struct pages are initialized. Therefore, page_ext is initialized after page_alloc_init_late(), and can optionally be initialized earlier via early_page_ext kernel parameter which as a side effect also disables deferred struct pages. Allow to automatically init page_ext early when there are no deferred struct pages in order to be able to use page_ext during kernel boot and track for example page allocations early. [pasha.tatashin@soleen.com: fix build with CONFIG_PAGE_EXTENSION=n] Link: https://lkml.kernel.org/r/20230118155251.2522985-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20230117204617.1553748-1-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Acked-by: Mike Rapoport (IBM) Acked-by: Vlastimil Babka Cc: Charan Teja Kalla Cc: David Hildenbrand Cc: Li Zhe Cc: Michal Hocko Signed-off-by: Andrew Morton --- include/linux/page_ext.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 67314f648aeb..bc2e39090a1f 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -29,6 +29,8 @@ struct page_ext_operations { bool need_shared_flags; }; +extern bool deferred_struct_pages; + #ifdef CONFIG_PAGE_EXTENSION /* -- cgit v1.2.3 From 04bac040bc71b4b37550eed5854f34ca161756f9 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 18 Jan 2023 09:40:39 -0800 Subject: mm/hugetlb: convert get_hwpoison_huge_page() to folios Straightforward conversion of get_hwpoison_huge_page() to get_hwpoison_hugetlb_folio(). Reduces two references to a head page in memory-failure.c [arnd@arndb.de: fix get_hwpoison_hugetlb_folio() stub] Link: https://lkml.kernel.org/r/20230119111920.635260-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20230118174039.14247-1-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Signed-off-by: Arnd Bergmann Acked-by: Naoya Horiguchi Reviewed-by: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index cf60fe741c1d..a51e6daacac6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -172,7 +172,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); int isolate_hugetlb(struct page *page, struct list_head *list); -int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison); +int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); void putback_active_hugepage(struct page *page); @@ -418,7 +418,7 @@ static inline int isolate_hugetlb(struct page *page, struct list_head *list) return -EBUSY; } -static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison) +static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) { return 0; } -- cgit v1.2.3 From 5649d113ffce9f532a9ecc5ab96a93e02efbf283 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Wed, 18 Jan 2023 20:13:03 +0800 Subject: swap_state: update shadow_nodes for anonymous page Shadow_nodes is for shadow nodes reclaiming of workingset handling, it is updated when page cache add or delete since long time ago workingset only supported page cache. But when workingset supports anonymous page detection, we missied updating shadow nodes for it. This caused that shadow nodes of anonymous page will never be reclaimd by scan_shadow_nodes() even they use much memory and system memory is tense. So update shadow_nodes of anonymous page when swap cache is add or delete by calling xas_set_update(..workingset_update_node). Link: https://lkml.kernel.org/r/202301182013032211005@zte.com.cn Fixes: aae466b0052e ("mm/swap: implement workingset detection for anonymous LRU") Signed-off-by: Yang Yang Reviewed-by: Ran Xiaokai Cc: Bagas Sanjaya Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/xarray.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 44dd6d6e01bc..741703b45f61 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1643,7 +1643,8 @@ static inline void xas_set_order(struct xa_state *xas, unsigned long index, * @update: Function to call when updating a node. * * The XArray can notify a caller after it has updated an xa_node. - * This is advanced functionality and is only needed by the page cache. + * This is advanced functionality and is only needed by the page + * cache and swap cache. */ static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update) { -- cgit v1.2.3 From b507808ebce23561d4ff8c2aa1fb949fe402bc61 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 19 Jan 2023 16:03:43 +0000 Subject: mm: implement memory-deny-write-execute as a prctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: In-kernel support for memory-deny-write-execute (MDWE)", v2. The background to this is that systemd has a configuration option called MemoryDenyWriteExecute [2], implemented as a SECCOMP BPF filter. Its aim is to prevent a user task from inadvertently creating an executable mapping that is (or was) writeable. Since such BPF filter is stateless, it cannot detect mappings that were previously writeable but subsequently changed to read-only. Therefore the filter simply rejects any mprotect(PROT_EXEC). The side-effect is that on arm64 with BTI support (Branch Target Identification), the dynamic loader cannot change an ELF section from PROT_EXEC to PROT_EXEC|PROT_BTI using mprotect(). For libraries, it can resort to unmapping and re-mapping but for the main executable it does not have a file descriptor. The original bug report in the Red Hat bugzilla - [3] - and subsequent glibc workaround for libraries - [4]. This series adds in-kernel support for this feature as a prctl PR_SET_MDWE, that is inherited on fork(). The prctl denies PROT_WRITE | PROT_EXEC mappings. Like the systemd BPF filter it also denies adding PROT_EXEC to mappings. However unlike the BPF filter it only denies it if the mapping didn't previous have PROT_EXEC. This allows to PROT_EXEC -> PROT_EXEC | PROT_BTI with mprotect(), which is a problem with the BPF filter. This patch (of 2): The aim of such policy is to prevent a user task from creating an executable mapping that is also writeable. An example of mmap() returning -EACCESS if the policy is enabled: mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, 0, 0); Similarly, mprotect() would return -EACCESS below: addr = mmap(0, size, PROT_READ | PROT_EXEC, flags, 0, 0); mprotect(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC); The BPF filter that systemd MDWE uses is stateless, and disallows mprotect() with PROT_EXEC completely. This new prctl allows PROT_EXEC to be enabled if it was already PROT_EXEC, which allows the following case: addr = mmap(0, size, PROT_READ | PROT_EXEC, flags, 0, 0); mprotect(addr, size, PROT_READ | PROT_EXEC | PROT_BTI); where PROT_BTI enables branch tracking identification on arm64. Link: https://lkml.kernel.org/r/20230119160344.54358-1-joey.gouly@arm.com Link: https://lkml.kernel.org/r/20230119160344.54358-2-joey.gouly@arm.com Signed-off-by: Joey Gouly Co-developed-by: Catalin Marinas Signed-off-by: Catalin Marinas Cc: Alexander Viro Cc: Jeremy Linton Cc: Kees Cook Cc: Lennart Poettering Cc: Mark Brown Cc: nd Cc: Shuah Khan Cc: Szabolcs Nagy Cc: Topi Miettinen Cc: Zbigniew Jędrzejewski-Szmek Cc: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/mman.h | 34 ++++++++++++++++++++++++++++++++++ include/linux/sched/coredump.h | 6 +++++- 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mman.h b/include/linux/mman.h index 58b3abd457a3..cee1e4b566d8 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -156,4 +156,38 @@ calc_vm_flag_bits(unsigned long flags) } unsigned long vm_commit_limit(void); + +/* + * Denies creating a writable executable mapping or gaining executable permissions. + * + * This denies the following: + * + * a) mmap(PROT_WRITE | PROT_EXEC) + * + * b) mmap(PROT_WRITE) + * mprotect(PROT_EXEC) + * + * c) mmap(PROT_WRITE) + * mprotect(PROT_READ) + * mprotect(PROT_EXEC) + * + * But allows the following: + * + * d) mmap(PROT_READ | PROT_EXEC) + * mmap(PROT_READ | PROT_EXEC | PROT_BTI) + */ +static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags) +{ + if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) + return false; + + if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE)) + return true; + + if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC)) + return true; + + return false; +} + #endif /* _LINUX_MMAN_H */ diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 8270ad7ae14c..0e17ae7fbfd3 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -81,9 +81,13 @@ static inline int get_dumpable(struct mm_struct *mm) * lifecycle of this mm, just for simplicity. */ #define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */ + +#define MMF_HAS_MDWE 28 +#define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) + #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ - MMF_DISABLE_THP_MASK) + MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK) #endif /* _LINUX_SCHED_COREDUMP_H */ -- cgit v1.2.3 From 6b3f013bb90e737b06c7955571407190b4c760ce Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 19 Jan 2023 01:38:29 +0000 Subject: mm/damon: update comments in damon.h for damon_attrs Patch series "mm/damon: misc fixes". This patchset contains three miscellaneous simple fixes for DAMON online tuning. This patch (of 3): Commit cbeaa77b0449 ("mm/damon/core: use a dedicated struct for monitoring attributes") moved monitoring intervals from damon_ctx to a new struct, damon_attrs, but a comment in the header file has not updated for the change. Update it. Link: https://lkml.kernel.org/r/20230119013831.1911-1-sj@kernel.org Link: https://lkml.kernel.org/r/20230119013831.1911-2-sj@kernel.org Fixes: cbeaa77b0449 ("mm/damon/core: use a dedicated struct for monitoring attributes") Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index dfb245bb3053..d5d4d19928e0 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -354,10 +354,10 @@ struct damon_ctx; * users should register the low level operations for their target address * space and usecase via the &damon_ctx.ops. Then, the monitoring thread * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting - * the monitoring, @update after each &damon_ctx.ops_update_interval, and + * the monitoring, @update after each &damon_attrs.ops_update_interval, and * @check_accesses, @target_valid and @prepare_access_checks after each - * &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each - * &damon_ctx.aggr_interval. + * &damon_attrs.sample_interval. Finally, @reset_aggregated is called after + * each &damon_attrs.aggr_interval. * * Each &struct damon_operations instance having valid @id can be registered * via damon_register_ops() and selected by damon_select_ops() later. -- cgit v1.2.3 From 36c7b4db7c942ae9e1b111f0c6b468c8b2e33842 Mon Sep 17 00:00:00 2001 From: "T.J. Alumbaugh" Date: Wed, 18 Jan 2023 00:18:24 +0000 Subject: mm: multi-gen LRU: section for memcg LRU Move memcg LRU code into a dedicated section. Improve the design doc to outline its architecture. Link: https://lkml.kernel.org/r/20230118001827.1040870-5-talumbau@google.com Signed-off-by: T.J. Alumbaugh Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 17 ----------------- include/linux/mmzone.h | 13 ++----------- 2 files changed, 2 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 26dcbda07e92..de1e622dd366 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -122,18 +122,6 @@ static inline bool lru_gen_in_fault(void) return current->in_lru_fault; } -#ifdef CONFIG_MEMCG -static inline int lru_gen_memcg_seg(struct lruvec *lruvec) -{ - return READ_ONCE(lruvec->lrugen.seg); -} -#else -static inline int lru_gen_memcg_seg(struct lruvec *lruvec) -{ - return 0; -} -#endif - static inline int lru_gen_from_seq(unsigned long seq) { return seq % MAX_NR_GENS; @@ -309,11 +297,6 @@ static inline bool lru_gen_in_fault(void) return false; } -static inline int lru_gen_memcg_seg(struct lruvec *lruvec) -{ - return 0; -} - static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { return false; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 815c7c2edf45..977be526c939 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -368,15 +368,6 @@ struct page_vma_mapped_walk; #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) -/* see the comment on MEMCG_NR_GENS */ -enum { - MEMCG_LRU_NOP, - MEMCG_LRU_HEAD, - MEMCG_LRU_TAIL, - MEMCG_LRU_OLD, - MEMCG_LRU_YOUNG, -}; - #ifdef CONFIG_LRU_GEN enum { @@ -557,7 +548,7 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg); void lru_gen_online_memcg(struct mem_cgroup *memcg); void lru_gen_offline_memcg(struct mem_cgroup *memcg); void lru_gen_release_memcg(struct mem_cgroup *memcg); -void lru_gen_rotate_memcg(struct lruvec *lruvec, int op); +void lru_gen_soft_reclaim(struct lruvec *lruvec); #else /* !CONFIG_MEMCG */ @@ -608,7 +599,7 @@ static inline void lru_gen_release_memcg(struct mem_cgroup *memcg) { } -static inline void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) +static inline void lru_gen_soft_reclaim(struct lruvec *lruvec) { } -- cgit v1.2.3 From 44b8f8bf2438bfee3aceae4d647a7460213ff340 Mon Sep 17 00:00:00 2001 From: Jiaqi Yan Date: Fri, 20 Jan 2023 03:46:20 +0000 Subject: mm: memory-failure: add memory failure stats to sysfs Patch series "Introduce per NUMA node memory error statistics", v2. Background ========== In the RFC for Kernel Support of Memory Error Detection [1], one advantage of software-based scanning over hardware patrol scrubber is the ability to make statistics visible to system administrators. The statistics include 2 categories: * Memory error statistics, for example, how many memory error are encountered, how many of them are recovered by the kernel. Note these memory errors are non-fatal to kernel: during the machine check exception (MCE) handling kernel already classified MCE's severity to be unnecessary to panic (but either action required or optional). * Scanner statistics, for example how many times the scanner have fully scanned a NUMA node, how many errors are first detected by the scanner. The memory error statistics are useful to userspace and actually not specific to scanner detected memory errors, and are the focus of this patchset. Motivation ========== Memory error stats are important to userspace but insufficient in kernel today. Datacenter administrators can better monitor a machine's memory health with the visible stats. For example, while memory errors are inevitable on servers with 10+ TB memory, starting server maintenance when there are only 1~2 recovered memory errors could be overreacting; in cloud production environment maintenance usually means live migrate all the workload running on the server and this usually causes nontrivial disruption to the customer. Providing insight into the scope of memory errors on a system helps to determine the appropriate follow-up action. In addition, the kernel's existing memory error stats need to be standardized so that userspace can reliably count on their usefulness. Today kernel provides following memory error info to userspace, but they are not sufficient or have disadvantages: * HardwareCorrupted in /proc/meminfo: number of bytes poisoned in total, not per NUMA node stats though * ras:memory_failure_event: only available after explicitly enabled * /dev/mcelog provides many useful info about the MCEs, but doesn't capture how memory_failure recovered memory MCEs * kernel logs: userspace needs to process log text Exposing memory error stats is also a good start for the in-kernel memory error detector. Today the data source of memory error stats are either direct memory error consumption, or hardware patrol scrubber detection (either signaled as UCNA or SRAO). Once in-kernel memory scanner is implemented, it will be the main source as it is usually configured to scan memory DIMMs constantly and faster than hardware patrol scrubber. How Implemented =============== As Naoya pointed out [2], exposing memory error statistics to userspace is useful independent of software or hardware scanner. Therefore we implement the memory error statistics independent of the in-kernel memory error detector. It exposes the following per NUMA node memory error counters: /sys/devices/system/node/node${X}/memory_failure/total /sys/devices/system/node/node${X}/memory_failure/recovered /sys/devices/system/node/node${X}/memory_failure/ignored /sys/devices/system/node/node${X}/memory_failure/failed /sys/devices/system/node/node${X}/memory_failure/delayed These counters describe how many raw pages are poisoned and after the attempted recoveries by the kernel, their resolutions: how many are recovered, ignored, failed, or delayed respectively. This approach can be easier to extend for future use cases than /proc/meminfo, trace event, and log. The following math holds for the statistics: * total = recovered + ignored + failed + delayed These memory error stats are reset during machine boot. The 1st commit introduces these sysfs entries. The 2nd commit populates memory error stats every time memory_failure attempts memory error recovery. The 3rd commit adds documentations for introduced stats. [1] https://lore.kernel.org/linux-mm/7E670362-C29E-4626-B546-26530D54F937@gmail.com/T/#mc22959244f5388891c523882e61163c6e4d703af [2] https://lore.kernel.org/linux-mm/7E670362-C29E-4626-B546-26530D54F937@gmail.com/T/#m52d8d7a333d8536bd7ce74253298858b1c0c0ac6 This patch (of 3): Today kernel provides following memory error info to userspace, but each has its own disadvantage * HardwareCorrupted in /proc/meminfo: number of bytes poisoned in total, not per NUMA node stats though * ras:memory_failure_event: only available after explicitly enabled * /dev/mcelog provides many useful info about the MCEs, but doesn't capture how memory_failure recovered memory MCEs * kernel logs: userspace needs to process log text Exposes per NUMA node memory error stats as sysfs entries: /sys/devices/system/node/node${X}/memory_failure/total /sys/devices/system/node/node${X}/memory_failure/recovered /sys/devices/system/node/node${X}/memory_failure/ignored /sys/devices/system/node/node${X}/memory_failure/failed /sys/devices/system/node/node${X}/memory_failure/delayed These counters describe how many raw pages are poisoned and after the attempted recoveries by the kernel, their resolutions: how many are recovered, ignored, failed, or delayed respectively. The following math holds for the statistics: * total = recovered + ignored + failed + delayed Link: https://lkml.kernel.org/r/20230120034622.2698268-1-jiaqiyan@google.com Link: https://lkml.kernel.org/r/20230120034622.2698268-2-jiaqiyan@google.com Signed-off-by: Jiaqi Yan Acked-by: David Rientjes Acked-by: Naoya Horiguchi Cc: Kefeng Wang Cc: Tony Luck Cc: Yang Shi Signed-off-by: Andrew Morton --- include/linux/mm.h | 5 +++++ include/linux/mmzone.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 836b96e08a14..c9db257f09b3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3455,6 +3455,11 @@ enum mf_action_page_type { MF_MSG_UNKNOWN, }; +/* + * Sysfs entries for memory failure handling statistics. + */ +extern const struct attribute_group memory_failure_attr_group; + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) extern void clear_huge_page(struct page *page, unsigned long addr_hint, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 977be526c939..9fb1b03b83b2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1212,6 +1212,31 @@ struct deferred_split { }; #endif +#ifdef CONFIG_MEMORY_FAILURE +/* + * Per NUMA node memory failure handling statistics. + */ +struct memory_failure_stats { + /* + * Number of raw pages poisoned. + * Cases not accounted: memory outside kernel control, offline page, + * arch-specific memory_failure (SGX), hwpoison_filter() filtered + * error events, and unpoison actions from hwpoison_unpoison. + */ + unsigned long total; + /* + * Recovery results of poisoned raw pages handled by memory_failure, + * in sync with mf_result. + * total = ignored + failed + delayed + recovered. + * total * PAGE_SIZE * #nodes = /proc/meminfo/HardwareCorrupted. + */ + unsigned long ignored; + unsigned long failed; + unsigned long delayed; + unsigned long recovered; +}; +#endif + /* * On NUMA machines, each NUMA node would have a pg_data_t to describe * it's memory layout. On UMA machines there is a single pglist_data which @@ -1357,6 +1382,9 @@ typedef struct pglist_data { #ifdef CONFIG_NUMA struct memory_tier __rcu *memtier; #endif +#ifdef CONFIG_MEMORY_FAILURE + struct memory_failure_stats mf_stats; +#endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) -- cgit v1.2.3 From 3222d8c2a7f888bf38b845b125e9470b12108a4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Jan 2023 14:34:36 +0100 Subject: block: remove ->rw_page The ->rw_page method is a special purpose bypass of the usual bio handling path that is limited to single-page reads and writes and synchronous which causes a lot of extra code in the drivers, callers and the block layer. The only remaining user is the MM swap code. Switch that swap code to simply submit a single-vec on-stack bio an synchronously wait on it based on a newly added QUEUE_FLAG_SYNCHRONOUS flag set by the drivers that currently implement ->rw_page instead. While this touches one extra cache line and executes extra code, it simplifies the block layer and drivers and ensures that all feastures are properly supported by all drivers, e.g. right now ->rw_page bypassed cgroup writeback entirely. [akpm@linux-foundation.org: fix comment typo, per Dan] Link: https://lkml.kernel.org/r/20230125133436.447864-8-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Cc: Dave Jiang Cc: Ira Weiny Cc: Jens Axboe Cc: Keith Busch Cc: Minchan Kim Cc: Sergey Senozhatsky Cc: Vishal Verma Signed-off-by: Andrew Morton --- include/linux/blkdev.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 43d4e073b111..c5e59965b145 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -554,6 +554,7 @@ struct request_queue { #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ +#define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ @@ -1250,6 +1251,12 @@ static inline bool bdev_nonrot(struct block_device *bdev) return blk_queue_nonrot(bdev_get_queue(bdev)); } +static inline bool bdev_synchronous(struct block_device *bdev) +{ + return test_bit(QUEUE_FLAG_SYNCHRONOUS, + &bdev_get_queue(bdev)->queue_flags); +} + static inline bool bdev_stable_writes(struct block_device *bdev) { return test_bit(QUEUE_FLAG_STABLE_WRITES, @@ -1382,7 +1389,6 @@ struct block_device_operations { unsigned int flags); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); - int (*rw_page)(struct block_device *, sector_t, struct page *, enum req_op); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, @@ -1417,10 +1423,6 @@ extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t, #define blkdev_compat_ptr_ioctl NULL #endif -extern int bdev_read_page(struct block_device *, sector_t, struct page *); -extern int bdev_write_page(struct block_device *, sector_t, struct page *, - struct writeback_control *); - static inline void blk_wake_io_task(struct task_struct *waiter) { /* -- cgit v1.2.3 From 00cdf76012ab78b225345e8cf77d5391b4680b45 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 26 Jan 2023 20:15:52 +0000 Subject: mm: add memcpy_from_file_folio() This is the equivalent of memcpy_from_page(). It differs in that it takes the position in a file instead of offset in a folio, it accepts the total number of bytes to be copied (instead of the number of bytes to be copied from this folio) and it returns how many bytes were copied from the folio, rather than making the caller calculate that and then checking if the caller got it right. [akpm@linux-foundation.org: fix typo in comment] Link: https://lkml.kernel.org/r/20230126201552.1681588-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: "Fabio M. De Francesco" Cc: Ira Weiny Signed-off-by: Andrew Morton --- include/linux/highmem.h | 29 +++++++++++++++++++++++++++++ include/linux/page-flags.h | 1 + 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index e22509420ac6..348701dae77f 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -413,6 +413,35 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) kunmap_local(addr); } +/** + * memcpy_from_file_folio - Copy some bytes from a file folio. + * @to: The destination buffer. + * @folio: The folio to copy from. + * @pos: The position in the file. + * @len: The maximum number of bytes to copy. + * + * Copy up to @len bytes from this folio. This may be limited by PAGE_SIZE + * if the folio comes from HIGHMEM, and by the size of the folio. + * + * Return: The number of bytes copied from the folio. + */ +static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, + loff_t pos, size_t len) +{ + size_t offset = offset_in_folio(folio, pos); + char *from = kmap_local_folio(folio, offset); + + if (folio_test_highmem(folio)) + len = min_t(size_t, len, PAGE_SIZE - offset); + else + len = min(len, folio_size(folio) - offset); + + memcpy(to, from, len); + kunmap_local(from); + + return len; +} + /** * folio_zero_segments() - Zero two byte ranges in a folio. * @folio: The folio to write to. diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 69e93a0c1277..a7e3a3405520 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -531,6 +531,7 @@ PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) * available at this point. */ #define PageHighMem(__p) is_highmem_idx(page_zonenum(__p)) +#define folio_test_highmem(__f) is_highmem_idx(folio_zonenum(__f)) #else PAGEFLAG_FALSE(HighMem, highmem) #endif -- cgit v1.2.3 From d585bdbeb79aa13b8a9bbe952d90f5252f7fe909 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 26 Jan 2023 20:12:54 +0000 Subject: fs: convert writepage_t callback to pass a folio Patch series "Convert writepage_t to use a folio". More folioisation. I split out the mpage work from everything else because it completely dominated the patch, but some implementations I just converted outright. This patch (of 2): We always write back an entire folio, but that's currently passed as the head page. Convert all filesystems that use write_cache_pages() to expect a folio instead of a page. Link: https://lkml.kernel.org/r/20230126201255.1681189-1-willy@infradead.org Link: https://lkml.kernel.org/r/20230126201255.1681189-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Christoph Hellwig Signed-off-by: Andrew Morton --- include/linux/writeback.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 3f1491b07474..46020373e155 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -366,7 +366,7 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping, bool wb_over_bg_thresh(struct bdi_writeback *wb); -typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, +typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc, void *data); void tag_pages_for_writeback(struct address_space *mapping, -- cgit v1.2.3 From 6338bb05c15f88e2f4beae296ef389224837758c Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 12 Dec 2022 11:46:44 +0900 Subject: error-injection: remove EI_ETYPE_NONE Patch series "error-injection: Clarify the requirements of error injectable functions". Patches for clarifying the requirement of error injectable functions and to remove the confusing EI_ETYPE_NONE. This patch (of 2): Since the EI_ETYPE_NONE is confusing type, replace it with appropriate errno. The EI_ETYPE_NONE has been introduced for a dummy (error) value, but it can mislead people that they can use ALLOW_ERROR_INJECTION(func, NONE). So remove it from the EI_ETYPE and use appropriate errno instead. [akpm@linux-foundation.org: include/linux/error-injection.h needs errno.h] Link: https://lkml.kernel.org/r/167081319306.387937.10079195394503045678.stgit@devnote3 Link: https://lkml.kernel.org/r/167081320421.387937.4259807348852421112.stgit@devnote3 Fixes: 663faf9f7bee ("error-injection: Add injectable error types") Signed-off-by: Masami Hiramatsu (Google) Cc: Alexei Starovoitov Cc: Borislav Petkov (AMD) Cc: Chris Mason Cc: Christoph Hellwig Cc: Florent Revest Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Kees Cook Cc: KP Singh Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/error-injection.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h index 635a95caf29f..20e738f4eae8 100644 --- a/include/linux/error-injection.h +++ b/include/linux/error-injection.h @@ -3,6 +3,7 @@ #define _LINUX_ERROR_INJECTION_H #include +#include #include #ifdef CONFIG_FUNCTION_ERROR_INJECTION @@ -19,7 +20,7 @@ static inline bool within_error_injection_list(unsigned long addr) static inline int get_injectable_error_type(unsigned long addr) { - return EI_ETYPE_NONE; + return -EOPNOTSUPP; } #endif -- cgit v1.2.3 From 88ad32a799ddc92eafd2ae204cb43f04ac20a05c Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Fri, 16 Dec 2022 16:04:40 +0100 Subject: include/linux/percpu_counter.h: race in uniprocessor percpu_counter_add() The percpu interface is supposed to be preempt and irq safe. But: The uniprocessor implementation of percpu_counter_add() is not irq safe: if an interrupt happens during the +=, then the result is undefined. Therefore: switch from preempt_disable() to local_irq_save(). This prevents interrupts from interrupting the +=, and as a side effect prevents preemption. Link: https://lkml.kernel.org/r/20221216150441.200533-2-manfred@colorfullife.com Signed-off-by: Manfred Spraul Cc: "Sun, Jiebin" Cc: <1vier1@web.de> Cc: Alexander Sverdlin Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/percpu_counter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index a3aae8d57a42..521a733e21a9 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -152,9 +152,11 @@ __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { - preempt_disable(); + unsigned long flags; + + local_irq_save(flags); fbc->count += amount; - preempt_enable(); + local_irq_restore(flags); } /* non-SMP percpu_counter_add_local is the same with percpu_counter_add */ -- cgit v1.2.3 From 9456d539acde9f92a52ffe477b4b86e35d214d1a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Jan 2023 14:19:37 +0200 Subject: util_macros.h: add missing inclusion The header is the direct user of definitions from the math.h, include it. Link: https://lkml.kernel.org/r/20230103121937.32085-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton --- include/linux/util_macros.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 72299f261b25..b641ec00be3e 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -2,6 +2,8 @@ #ifndef _LINUX_HELPER_MACROS_H_ #define _LINUX_HELPER_MACROS_H_ +#include + #define __find_closest(x, a, as, op) \ ({ \ typeof(as) __fc_i, __fc_as = (as) - 1; \ -- cgit v1.2.3 From 7e99f8b69c11c104933b9bc8fda226ebfb8aaaa5 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 4 Jan 2023 15:38:47 +0100 Subject: kexec: factor out kexec_load_permitted Both syscalls (kexec and kexec_file) do the same check, let's factor it out. Link: https://lkml.kernel.org/r/20221114-disable-kexec-reset-v6-2-6a8531a09b9a@chromium.org Signed-off-by: Ricardo Ribalda Reviewed-by: Steven Rostedt (Google) Acked-by: Baoquan He Cc: Bagas Sanjaya Cc: "Eric W. Biederman" Cc: Guilherme G. Piccoli Cc: Joel Fernandes (Google) Cc: Jonathan Corbet Cc: Philipp Rudo Cc: Ross Zwisler Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- include/linux/kexec.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 5dd4343c1bbe..f18a3c9e813b 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -403,7 +403,8 @@ extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; -extern int kexec_load_disabled; + +bool kexec_load_permitted(void); #ifndef kexec_flush_icache_page #define kexec_flush_icache_page(page) -- cgit v1.2.3 From a42aaad2e47b23d63037bfc0130e33fc0f74cd71 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 4 Jan 2023 15:38:48 +0100 Subject: kexec: introduce sysctl parameters kexec_load_limit_* kexec allows replacing the current kernel with a different one. This is usually a source of concerns for sysadmins that want to harden a system. Linux already provides a way to disable loading new kexec kernel via kexec_load_disabled, but that control is very coard, it is all or nothing and does not make distinction between a panic kexec and a normal kexec. This patch introduces new sysctl parameters, with finer tuning to specify how many times a kexec kernel can be loaded. The sysadmin can set different limits for kexec panic and kexec reboot kernels. The value can be modified at runtime via sysctl, but only with a stricter value. With these new parameters on place, a system with loadpin and verity enabled, using the following kernel parameters: sysctl.kexec_load_limit_reboot=0 sysct.kexec_load_limit_panic=1 can have a good warranty that if initrd tries to load a panic kernel, a malitious user will have small chances to replace that kernel with a different one, even if they can trigger timeouts on the disk where the panic kernel lives. Link: https://lkml.kernel.org/r/20221114-disable-kexec-reset-v6-3-6a8531a09b9a@chromium.org Signed-off-by: Ricardo Ribalda Reviewed-by: Steven Rostedt (Google) Acked-by: Baoquan He Cc: Bagas Sanjaya Cc: "Eric W. Biederman" Cc: Guilherme G. Piccoli # Steam Deck Cc: Joel Fernandes (Google) Cc: Jonathan Corbet Cc: Philipp Rudo Cc: Ross Zwisler Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- include/linux/kexec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index f18a3c9e813b..6883c5922701 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -404,7 +404,7 @@ extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; -bool kexec_load_permitted(void); +bool kexec_load_permitted(int kexec_image_type); #ifndef kexec_flush_icache_page #define kexec_flush_icache_page(page) -- cgit v1.2.3 From d82e6ae67ac2f9d6ba51690353c477b340bba6b5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 31 Jan 2023 15:37:29 +0800 Subject: iommu/vt-d: Remove include/linux/intel-svm.h There's no need to have a public header for Intel SVA implementation. The device driver should interact with Intel SVA implementation via the IOMMU generic APIs. Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230109014955.147068-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-svm.h | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 include/linux/intel-svm.h (limited to 'include/linux') diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h deleted file mode 100644 index f9a0d44f6fdb..000000000000 --- a/include/linux/intel-svm.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright © 2015 Intel Corporation. - * - * Authors: David Woodhouse - */ - -#ifndef __INTEL_SVM_H__ -#define __INTEL_SVM_H__ - -/* Page Request Queue depth */ -#define PRQ_ORDER 4 -#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) -#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) - -#endif /* __INTEL_SVM_H__ */ -- cgit v1.2.3 From 4db96bfe9d7772d6ddedd62ce478895999043fd7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:34 +0800 Subject: iommu/vt-d: Support size of the register set in DRHD A new field, which indicates the size of the remapping hardware register set for this remapping unit, is introduced in the DMA-remapping hardware unit definition (DRHD) structure with the VT-d Spec 4.0. With this information, SW doesn't need to 'guess' the size of the register set anymore. Update the struct acpi_dmar_hardware_unit to reflect the field. Store the size of the register set in struct dmar_drhd_unit for each dmar device. The 'size' information is ResvZ for the old BIOS and platforms. Fall back to the old guessing method. There is nothing changed. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-2-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d81a51978d01..725d5e6acec0 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -39,6 +39,7 @@ struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ + unsigned long reg_size; /* size of register set */ struct dmar_dev_scope *devices;/* target device array */ int devices_cnt; /* target device count */ u16 segment; /* PCI domain */ -- cgit v1.2.3 From 46284c6ceb5e4dfddcb00dafb7c2f3c1437fdca4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:38 +0800 Subject: iommu/vt-d: Support cpumask for IOMMU perfmon The perf subsystem assumes that all counters are by default per-CPU. So the user space tool reads a counter from each CPU. However, the IOMMU counters are system-wide and can be read from any CPU. Here we use a CPU mask to restrict counting to one CPU to handle the issue. (with CPU hotplug notifier to choose a different CPU if the chosen one is taken off-line). The CPU is exposed to /sys/bus/event_source/devices/dmar*/cpumask for the user space perf tool. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-6-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 6c6859bfc454..f2ea348ce3b0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -221,6 +221,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, CPUHP_AP_PERF_X86_IDXD_ONLINE, + CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, -- cgit v1.2.3 From 0c05e7bd2d017a3a9a0f4e9a19ad4acf1f616f12 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 25 Jan 2023 10:54:01 -0800 Subject: livepatch,x86: Clear relocation targets on a module removal Josh reported a bug: When the object to be patched is a module, and that module is rmmod'ed and reloaded, it fails to load with: module: x86/modules: Skipping invalid relocation target, existing value is nonzero for type 2, loc 00000000ba0302e9, val ffffffffa03e293c livepatch: failed to initialize patch 'livepatch_nfsd' for module 'nfsd' (-8) livepatch: patch 'livepatch_nfsd' failed for module 'nfsd', refusing to load module 'nfsd' The livepatch module has a relocation which references a symbol in the _previous_ loading of nfsd. When apply_relocate_add() tries to replace the old relocation with a new one, it sees that the previous one is nonzero and it errors out. He also proposed three different solutions. We could remove the error check in apply_relocate_add() introduced by commit eda9cec4c9a1 ("x86/module: Detect and skip invalid relocations"). However the check is useful for detecting corrupted modules. We could also deny the patched modules to be removed. If it proved to be a major drawback for users, we could still implement a different approach. The solution would also complicate the existing code a lot. We thus decided to reverse the relocation patching (clear all relocation targets on x86_64). The solution is not universal and is too much arch-specific, but it may prove to be simpler in the end. Reported-by: Josh Poimboeuf Originally-by: Miroslav Benes Signed-off-by: Song Liu Acked-by: Miroslav Benes Reviewed-by: Petr Mladek Acked-by: Josh Poimboeuf Reviewed-by: Joe Lawrence Tested-by: Joe Lawrence Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230125185401.279042-2-song@kernel.org --- include/linux/moduleloader.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 9e09d11ffe5b..2ba4bc834a4f 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -72,6 +72,23 @@ int apply_relocate_add(Elf_Shdr *sechdrs, unsigned int symindex, unsigned int relsec, struct module *mod); +#ifdef CONFIG_LIVEPATCH +/* + * Some architectures (namely x86_64 and ppc64) perform sanity checks when + * applying relocations. If a patched module gets unloaded and then later + * reloaded (and re-patched), klp re-applies relocations to the replacement + * function(s). Any leftover relocations from the previous loading of the + * patched module might trigger the sanity checks. + * + * To prevent that, when unloading a patched module, clear out any relocations + * that might trigger arch-specific sanity checks on a future module reload. + */ +void clear_relocate_add(Elf_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me); +#endif #else static inline int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, -- cgit v1.2.3 From f05837ed73d0c73e950b2d9f2612febb0d3d451e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Feb 2023 16:03:48 +0100 Subject: blk-cgroup: store a gendisk to throttle in struct task_struct Switch from a request_queue pointer and reference to a gendisk once for the throttle information in struct task_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Andreas Herrmann Link: https://lore.kernel.org/r/20230203150400.3199230-8-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 853d08f7562b..6f6ce9ca7097 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1436,7 +1436,7 @@ struct task_struct { #endif #ifdef CONFIG_BLK_CGROUP - struct request_queue *throttle_queue; + struct gendisk *throttle_disk; #endif #ifdef CONFIG_UPROBES -- cgit v1.2.3 From 3f13ab7c80fdb0ada86a8e3e818960bc1ccbaa59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Feb 2023 16:04:00 +0100 Subject: blk-cgroup: move the cgroup information to struct gendisk cgroup information only makes sense on a live gendisk that allows file system I/O (which includes the raw block device). So move over the cgroup related members. Signed-off-by: Christoph Hellwig Reviewed-by: Andreas Herrmann Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20230203150400.3199230-20-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b9637d63e6f0..79aec4ebadb9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -163,6 +163,12 @@ struct gendisk { struct timer_rand_state *random; atomic_t sync_io; /* RAID */ struct disk_events *ev; +#ifdef CONFIG_BLK_CGROUP + DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); + struct blkcg_gq *root_blkg; + struct list_head blkg_list; + struct mutex blkcg_mutex; +#endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_BLK_DEV_INTEGRITY struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -481,12 +487,6 @@ struct request_queue { struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; -#ifdef CONFIG_BLK_CGROUP - DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); - struct blkcg_gq *root_blkg; - struct list_head blkg_list; - struct mutex blkcg_mutex; -#endif struct queue_limits limits; -- cgit v1.2.3 From d58cdfae6a22e5079656c487aad669597a0635c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Feb 2023 16:06:12 +0100 Subject: block: factor out a bvec_set_page helper Add a helper to initialize a bvec based of a page pointer. This will help removing various open code bvec initializations. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20230203150634.3199647-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bvec.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 7939b345ee7f..a7b7c016b9a3 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -34,6 +34,21 @@ struct bio_vec { unsigned int bv_offset; }; +/** + * bvec_set_page - initialize a bvec based off a struct page + * @bv: bvec to initialize + * @page: page the bvec should point to + * @len: length of the bvec + * @offset: offset into the page + */ +static inline void bvec_set_page(struct bio_vec *bv, struct page *page, + unsigned int len, unsigned int offset) +{ + bv->bv_page = page; + bv->bv_len = len; + bv->bv_offset = offset; +} + struct bvec_iter { sector_t bi_sector; /* device address in 512 byte sectors */ -- cgit v1.2.3 From 26db5ee158510108c819aa7be6eb8c75accf85d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Feb 2023 16:06:13 +0100 Subject: block: add a bvec_set_folio helper A smaller wrapper around bvec_set_page that takes a folio instead. There are only two potential users for this in the tree, but the number will grow in the future. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20230203150634.3199647-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bvec.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index a7b7c016b9a3..b9e64b91e92b 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -49,6 +49,19 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page, bv->bv_offset = offset; } +/** + * bvec_set_folio - initialize a bvec based off a struct folio + * @bv: bvec to initialize + * @folio: folio the bvec should point to + * @len: length of the bvec + * @offset: offset into the folio + */ +static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, + unsigned int len, unsigned int offset) +{ + bvec_set_page(bv, &folio->page, len, offset); +} + struct bvec_iter { sector_t bi_sector; /* device address in 512 byte sectors */ -- cgit v1.2.3 From 666e6550cb74e3a7206b5699409c9f31e123887e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Feb 2023 16:06:14 +0100 Subject: block: add a bvec_set_virt helper A small wrapper around bvec_set_page for callers that have a virtual address. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20230203150634.3199647-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bvec.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index b9e64b91e92b..555aae5448ae 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -62,6 +62,18 @@ static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, bvec_set_page(bv, &folio->page, len, offset); } +/** + * bvec_set_virt - initialize a bvec based on a virtual address + * @bv: bvec to initialize + * @vaddr: virtual address to set the bvec to + * @len: length of the bvec + */ +static inline void bvec_set_virt(struct bio_vec *bv, void *vaddr, + unsigned int len) +{ + bvec_set_page(bv, virt_to_page(vaddr), len, offset_in_page(vaddr)); +} + struct bvec_iter { sector_t bi_sector; /* device address in 512 byte sectors */ -- cgit v1.2.3 From 234fa51db95f3236a049557db735606908747f38 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Feb 2023 17:39:38 +0100 Subject: efi: Drop minimum EFI version check at boot We currently pass a minimum major version to the generic EFI helper that checks the system table magic and version, and refuse to boot if the value is lower. The motivation for this check is unknown, and even the code that uses major version 2 as the minimum (ARM, arm64 and RISC-V) should make it past this check without problems, and boot to a point where we have access to a console or some other means to inform the user that the firmware's major revision number made us unhappy. (Revision 2.0 of the UEFI specification was released in January 2006, whereas ARM, arm64 and RISC-V support where added in 2009, 2013 and 2017, respectively, so checking for major version 2 or higher is completely arbitrary) So just drop the check. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 507390dda8b9..9d455d502ac9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -721,8 +721,7 @@ static inline void efi_esrt_init(void) { } extern int efi_config_parse_tables(const efi_config_table_t *config_tables, int count, const efi_config_table_type_t *arch_tables); -extern int efi_systab_check_header(const efi_table_hdr_t *systab_hdr, - int min_major_version); +extern int efi_systab_check_header(const efi_table_hdr_t *systab_hdr); extern void efi_systab_report_header(const efi_table_hdr_t *systab_hdr, unsigned long fw_vendor); extern u64 efi_get_iobase (void); -- cgit v1.2.3 From c9a397cee9f5c93a7f48e18038b14057044db6ba Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 18 Jan 2023 13:50:28 -0400 Subject: vfio: Support VFIO_NOIOMMU with iommufd Add a small amount of emulation to vfio_compat to accept the SET_IOMMU to VFIO_NOIOMMU_IOMMU and have vfio just ignore iommufd if it is working on a no-iommu enabled device. Move the enable_unsafe_noiommu_mode module out of container.c into vfio_main.c so that it is always available even if VFIO_CONTAINER=n. This passes Alex's mini-test: https://github.com/awilliam/tests/blob/master/vfio-noiommu-pci-device-open.c Link: https://lore.kernel.org/r/0-v3-480cd64a16f7+1ad0-iommufd_noiommu_jgg@nvidia.com Reviewed-by: Kevin Tian Acked-by: Alex Williamson Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 650d45629647..c0b5b3ac34f1 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -57,7 +57,9 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length); int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t len, unsigned int flags); -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx); +int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx); #else /* !CONFIG_IOMMUFD */ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) { @@ -89,8 +91,12 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long return -EOPNOTSUPP; } -static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, - u32 *out_ioas_id) +static inline int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx) +{ + return -EOPNOTSUPP; +} + +static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) { return -EOPNOTSUPP; } -- cgit v1.2.3 From 36c5014e5460963ad7766487c0e22a7ff28681fc Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 31 Jan 2023 17:00:08 +0800 Subject: cpufreq: amd-pstate: optimize driver working mode selection in amd_pstate_param() The amd-pstate driver may support multiple working modes. Introduce a variable to keep track of which mode is currently enabled. Here we use cppc_state var to indicate which mode is enabled. This change will help to simplify the the amd_pstate_param() to choose which mode used for the following driver registration. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- include/linux/amd-pstate.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 1c4b8659f171..dae2ce0f6735 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -74,4 +74,21 @@ struct amd_cpudata { bool boost_supported; }; +/* + * enum amd_pstate_mode - driver working mode of amd pstate + */ +enum amd_pstate_mode { + AMD_PSTATE_DISABLE = 0, + AMD_PSTATE_PASSIVE, + AMD_PSTATE_ACTIVE, + AMD_PSTATE_MAX, +}; + +static const char * const amd_pstate_mode_string[] = { + [AMD_PSTATE_DISABLE] = "disable", + [AMD_PSTATE_PASSIVE] = "passive", + [AMD_PSTATE_ACTIVE] = "active", + NULL, +}; + #endif /* _LINUX_AMD_PSTATE_H */ -- cgit v1.2.3 From ffa5096a7c338641f70fb06d4778e8cf400181a8 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:09 +0800 Subject: cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors Add EPP driver support for AMD SoCs which support a dedicated MSR for CPPC. EPP is used by the DPM controller to configure the frequency that a core operates at during short periods of activity. The SoC EPP targets are configured on a scale from 0 to 255 where 0 represents maximum performance and 255 represents maximum efficiency. The amd-pstate driver exports profile string names to userspace that are tied to specific EPP values. The balance_performance string (0x80) provides the best balance for efficiency versus power on most systems, but users can choose other strings to meet their needs as well. $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences default performance balance_performance balance_power power $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference balance_performance To enable the driver,it needs to add `amd_pstate=active` to kernel command line and kernel will load the active mode epp driver Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- include/linux/amd-pstate.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index dae2ce0f6735..72ea7cf85ca3 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -12,6 +12,11 @@ #include +#define AMD_CPPC_EPP_PERFORMANCE 0x00 +#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80 +#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF +#define AMD_CPPC_EPP_POWERSAVE 0xFF + /********************************************************************* * AMD P-state INTERFACE * *********************************************************************/ @@ -47,6 +52,10 @@ struct amd_aperf_mperf { * @prev: Last Aperf/Mperf/tsc count value read from register * @freq: current cpu frequency value * @boost_supported: check whether the Processor or SBIOS supports boost mode + * @epp_policy: Last saved policy used to set energy-performance preference + * @epp_cached: Cached CPPC energy-performance preference value + * @policy: Cpufreq policy value + * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value * * The amd_cpudata is key private data for each CPU thread in AMD P-State, and * represents all the attributes and goals that AMD P-State requests at runtime. @@ -72,6 +81,12 @@ struct amd_cpudata { u64 freq; bool boost_supported; + + /* EPP feature related attributes*/ + s16 epp_policy; + s16 epp_cached; + u32 policy; + u64 cppc_cap1_cached; }; /* @@ -90,5 +105,4 @@ static const char * const amd_pstate_mode_string[] = { [AMD_PSTATE_ACTIVE] = "active", NULL, }; - #endif /* _LINUX_AMD_PSTATE_H */ -- cgit v1.2.3 From d4da12f8033a123353eccf993cb95ee5bff21e7c Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 31 Jan 2023 17:00:10 +0800 Subject: cpufreq: amd-pstate: implement amd pstate cpu online and offline callback Adds online and offline driver callback support to allow cpu cores go offline and help to restore the previous working states when core goes back online later for EPP driver mode. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- include/linux/amd-pstate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 72ea7cf85ca3..f5f22418e64b 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -87,6 +87,7 @@ struct amd_cpudata { s16 epp_cached; u32 policy; u64 cppc_cap1_cached; + bool suspended; }; /* -- cgit v1.2.3 From b0048092f7d3921d56f2c5bfa32062fac5e7500b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 3 Feb 2023 11:26:49 -0800 Subject: efi/cper, cxl: Remove cxl_err.h While going to create include/linux/cxl.h for some cross-subsystem CXL definitions I noticed that include/linux/cxl_err.h was already present. That header has no reason to be global, and it duplicates the RAS Capability Structure definitions in drivers/cxl/cxl.h. A follow-on patch can consider unifying the CXL native error tracing with the CPER error printing. Also fixed up the spec reference as the latest released spec is v3.0. Cc: Smita Koralahalli Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Signed-off-by: Ard Biesheuvel --- include/linux/cxl_err.h | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 include/linux/cxl_err.h (limited to 'include/linux') diff --git a/include/linux/cxl_err.h b/include/linux/cxl_err.h deleted file mode 100644 index 629e1bdeda44..000000000000 --- a/include/linux/cxl_err.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2022 Advanced Micro Devices, Inc. - * - * Author: Smita Koralahalli - */ - -#ifndef LINUX_CXL_ERR_H -#define LINUX_CXL_ERR_H - -/* CXL RAS Capability Structure, CXL v3.1 sec 8.2.4.16 */ -struct cxl_ras_capability_regs { - u32 uncor_status; - u32 uncor_mask; - u32 uncor_severity; - u32 cor_status; - u32 cor_mask; - u32 cap_control; - u32 header_log[16]; -}; - -#endif //__CXL_ERR_ -- cgit v1.2.3 From 3770e52fd4ec40ebee16ba19ad6c09dc0b52739b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 30 Jan 2023 14:07:26 +0100 Subject: mm: extend max struct page size for kmsan After x86 enabled support for KMSAN, it has become possible to have larger 'struct page' than was expected when commit 5470dea49f53 ("mm: use mm_zero_struct_page from SPARC on all 64b architectures") was merged: include/linux/mm.h:156:10: warning: no case matching constant switch condition '96' switch (sizeof(struct page)) { Extend the maximum accordingly. Link: https://lkml.kernel.org/r/20230130130739.563628-1-arnd@kernel.org Fixes: 5470dea49f53 ("mm: use mm_zero_struct_page from SPARC on all 64b architectures") Fixes: 4ca8cc8d1bbe ("x86: kmsan: enable KMSAN builds for x86") Fixes: f80be4571b19 ("kmsan: add KMSAN runtime core") Signed-off-by: Arnd Bergmann Acked-by: Michal Hocko Reviewed-by: Pasha Tatashin Cc: Alexander Duyck Cc: Alexander Potapenko Cc: Alex Sierra Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Liam R. Howlett Cc: Matthew Wilcox Cc: Naoya Horiguchi Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/mm.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8f857163ac89..f13f20258ce9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -137,7 +137,7 @@ extern int mmap_rnd_compat_bits __read_mostly; * define their own version of this macro in */ #if BITS_PER_LONG == 64 -/* This function must be updated when the size of struct page grows above 80 +/* This function must be updated when the size of struct page grows above 96 * or reduces below 56. The idea that compiler optimizes out switch() * statement, and only leaves move/store instructions. Also the compiler can * combine write statements if they are both assignments and can be reordered, @@ -148,12 +148,18 @@ static inline void __mm_zero_struct_page(struct page *page) { unsigned long *_pp = (void *)page; - /* Check that struct page is either 56, 64, 72, or 80 bytes */ + /* Check that struct page is either 56, 64, 72, 80, 88 or 96 bytes */ BUILD_BUG_ON(sizeof(struct page) & 7); BUILD_BUG_ON(sizeof(struct page) < 56); - BUILD_BUG_ON(sizeof(struct page) > 80); + BUILD_BUG_ON(sizeof(struct page) > 96); switch (sizeof(struct page)) { + case 96: + _pp[11] = 0; + fallthrough; + case 88: + _pp[10] = 0; + fallthrough; case 80: _pp[9] = 0; fallthrough; -- cgit v1.2.3 From cf1d2ffcc6f17b422239f6ab34b078945d07f9aa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Feb 2023 09:48:12 +0100 Subject: efi: Discover BTI support in runtime services regions Add the generic plumbing to detect whether or not the runtime code regions were constructed with BTI/IBT landing pads by the firmware, permitting the OS to enable enforcement when mapping these regions into the OS's address space. Signed-off-by: Ard Biesheuvel Reviewed-by: Kees Cook --- include/linux/efi.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 9d455d502ac9..df88786b5947 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -584,11 +584,15 @@ typedef struct { #define EFI_INVALID_TABLE_ADDR (~0UL) +// BIT0 implies that Runtime code includes the forward control flow guard +// instruction, such as X86 CET-IBT or ARM BTI. +#define EFI_MEMORY_ATTRIBUTES_FLAGS_RT_FORWARD_CONTROL_FLOW_GUARD 0x1 + typedef struct { u32 version; u32 num_entries; u32 desc_size; - u32 reserved; + u32 flags; efi_memory_desc_t entry[0]; } efi_memory_attributes_table_t; @@ -751,7 +755,7 @@ extern unsigned long efi_mem_attr_table; * argument in the page tables referred to by the * first argument. */ -typedef int (*efi_memattr_perm_setter)(struct mm_struct *, efi_memory_desc_t *); +typedef int (*efi_memattr_perm_setter)(struct mm_struct *, efi_memory_desc_t *, bool); extern int efi_memattr_init(void); extern int efi_memattr_apply_permissions(struct mm_struct *mm, -- cgit v1.2.3 From b63636b6c170aaf0becd67d14ba7c538d1f8d4f5 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 21 Nov 2022 17:16:38 -0800 Subject: net/mlx5: Add firmware support for MTUTC scaled_ppm frequency adjustments When device is capable of handling scaled ppm values for adjusting frequency, conversion to ppb will not be done by the driver. Instead, the scaled ppm value will be passed directly to the device for the frequency adjustment operation. Signed-off-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1b6201bb04c1..7cf6a78fea07 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9925,6 +9925,11 @@ struct mlx5_ifc_mpegc_reg_bits { u8 reserved_at_60[0x100]; }; +enum { + MLX5_MTUTC_FREQ_ADJ_UNITS_PPB = 0x0, + MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM = 0x1, +}; + enum { MLX5_MTUTC_OPERATION_SET_TIME_IMMEDIATE = 0x1, MLX5_MTUTC_OPERATION_ADJUST_TIME = 0x2, @@ -9932,7 +9937,9 @@ enum { }; struct mlx5_ifc_mtutc_reg_bits { - u8 reserved_at_0[0x1c]; + u8 reserved_at_0[0x5]; + u8 freq_adj_units[0x3]; + u8 reserved_at_8[0x14]; u8 operation[0x4]; u8 freq_adjustment[0x20]; @@ -10005,7 +10012,8 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x51]; + u8 reserved_at_0[0x50]; + u8 mtutc_freq_adj_units[0x1]; u8 mtutc_time_adjustment_extended_range[0x1]; u8 reserved_at_52[0xb]; u8 mcia_32dwords[0x1]; -- cgit v1.2.3 From 8d9ef69487e114f80e20ffbec14ca8684953fef0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 26 Jan 2023 16:38:21 +0100 Subject: mfd: intel_soc_pmic_chtwc: Add Lenovo Yoga Tab 3 X90F to intel_cht_wc_models The drivers for various CHT Whiskey Cove PMIC child-devices need to know the model, since they have model specific behavior. The DMI match table for this is shared between the child-device-drivers inside the MFD driver. Add the Lenovo Yoga Tab 3 X90F, which is a previously unknown tablet model with a CHT Whiskey Cove PMIC, to the intel_cht_wc_models enum and to the DMI match table. Signed-off-by: Hans de Goede Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230126153823.22146-2-hdegoede@redhat.com --- include/linux/mfd/intel_soc_pmic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h index 945bde1fe55c..9ba2c1a8d836 100644 --- a/include/linux/mfd/intel_soc_pmic.h +++ b/include/linux/mfd/intel_soc_pmic.h @@ -18,6 +18,7 @@ enum intel_cht_wc_models { INTEL_CHT_WC_GPD_WIN_POCKET, INTEL_CHT_WC_XIAOMI_MIPAD2, INTEL_CHT_WC_LENOVO_YOGABOOK1, + INTEL_CHT_WC_LENOVO_YT3_X90, }; /** -- cgit v1.2.3 From 58ef4ece1e41ac525db3e79529909683325d85df Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 31 Jan 2023 20:18:51 -0800 Subject: soc: qcom: pmic_glink: Introduce base PMIC GLINK driver The PMIC GLINK service runs on one of the co-processors of some modern Qualcomm platforms and implements USB-C and battery managements. It uses a message based protocol over GLINK for communication with the OS, hence the name. The driver implemented provides the rpmsg device for communication and uses auxiliary bus to spawn off individual devices in respective subsystem. The auxiliary devices are spawned off from a platform_device, so that the drm_bridge is available early, to allow the DisplayPort driver to probe even before the remoteproc has spun up. Signed-off-by: Bjorn Andersson Tested-by: Konrad Dybcio # SM8350 PDX215 Reviewed-by: Neil Armstrong Tested-by: Neil Armstrong # on SM8550-MTP & SM8450-HDK Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230201041853.1934355-3-quic_bjorande@quicinc.com --- include/linux/soc/qcom/pmic_glink.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/linux/soc/qcom/pmic_glink.h (limited to 'include/linux') diff --git a/include/linux/soc/qcom/pmic_glink.h b/include/linux/soc/qcom/pmic_glink.h new file mode 100644 index 000000000000..fd124aa18c81 --- /dev/null +++ b/include/linux/soc/qcom/pmic_glink.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022, Linaro Ltd + */ +#ifndef __SOC_QCOM_PMIC_GLINK_H__ +#define __SOC_QCOM_PMIC_GLINK_H__ + +struct pmic_glink; +struct pmic_glink_client; + +#define PMIC_GLINK_OWNER_BATTMGR 32778 +#define PMIC_GLINK_OWNER_USBC 32779 +#define PMIC_GLINK_OWNER_USBC_PAN 32780 + +#define PMIC_GLINK_REQ_RESP 1 +#define PMIC_GLINK_NOTIFY 2 + +struct pmic_glink_hdr { + __le32 owner; + __le32 type; + __le32 opcode; +}; + +int pmic_glink_send(struct pmic_glink_client *client, void *data, size_t len); + +struct pmic_glink_client *devm_pmic_glink_register_client(struct device *dev, + unsigned int id, + void (*cb)(const void *, size_t, void *), + void (*pdr)(void *, int), + void *priv); + +#endif -- cgit v1.2.3 From 835a486cd9f55790dee9f6b67ce0057d49f15da5 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 3 Jan 2023 19:42:15 +0530 Subject: genirq: Add mechanism to multiplex a single HW IPI All RISC-V platforms have a single HW IPI provided by the INTC local interrupt controller. The HW method to trigger INTC IPI can be through external irqchip (e.g. RISC-V AIA), through platform specific device (e.g. SiFive CLINT timer), or through firmware (e.g. SBI IPI call). To support multiple IPIs on RISC-V, add a generic IPI multiplexing mechanism which help us create multiple virtual IPIs using a single HW IPI. This generic IPI multiplexing is inspired by the Apple AIC irqchip driver and it is shared by various RISC-V irqchip drivers. Signed-off-by: Anup Patel Reviewed-by: Hector Martin Tested-by: Hector Martin Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230103141221.772261-4-apatel@ventanamicro.com --- include/linux/irq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index c3eb89606c2b..b1b28affb32a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1266,6 +1266,9 @@ int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest); int ipi_send_single(unsigned int virq, unsigned int cpu); int ipi_send_mask(unsigned int virq, const struct cpumask *dest); +void ipi_mux_process(void); +int ipi_mux_create(unsigned int nr_ipi, void (*mux_send)(unsigned int cpu)); + #ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER /* * Registers a generic IRQ handling function as the top-level IRQ handler in -- cgit v1.2.3 From fb6211f1584aad12c267c3333273f42f69438ced Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B3=20=C3=81gila=20Bitsch?= Date: Sat, 4 Feb 2023 13:15:01 +0100 Subject: usb: gadget: add doc to struct usb_composite_dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added documentation to new struct members for WebUSB: * bcd_webusb_version * b_webusb_vendor_code * landing_page * use_webusb to avoid warnings in the build of htmldocs Fixes: 93c473948c58 ("usb: gadget: add WebUSB landing page support") Reported-by: Stephen Rothwell Signed-off-by: Jó Ágila Bitsch Link: https://lore.kernel.org/r/Y95MRZZz3yC5lETB@jo-einhundert Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 91d22c3ed458..7ef8cea67f50 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -432,6 +432,10 @@ static inline struct usb_composite_driver *to_cdriver( * @qw_sign: qwSignature part of the OS string * @b_vendor_code: bMS_VendorCode part of the OS string * @use_os_string: false by default, interested gadgets set it + * @bcd_webusb_version: 0x0100 by default, WebUSB specification version + * @b_webusb_vendor_code: 0x0 by default, vendor code for WebUSB + * @landing_page: empty by default, landing page to announce in WebUSB + * @use_webusb:: false by default, interested gadgets set it * @os_desc_config: the configuration to be used with OS descriptors * @setup_pending: true when setup request is queued but not completed * @os_desc_pending: true when os_desc request is queued but not completed -- cgit v1.2.3 From 9dde0cd3b10f63bc4100ebadc7e32275baabfa68 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 3 Feb 2023 13:59:29 +0100 Subject: net: introduce skb_poison_list and use in kfree_skb_list First user of skb_poison_list is in kfree_skb_list_reason, to catch bugs earlier like introduced in commit eedade12f4cb ("net: kfree_skb_list use kmem_cache_free_bulk"). For completeness mentioned bug have been fixed in commit f72ff8b81ebc ("net: fix kfree_skb_list use of skb_mark_not_on_list"). In case of a bug like mentioned commit we would have seen OOPS with: general protection fault, probably for non-canonical address 0xdead000000000870 And content of one the registers e.g. R13: dead000000000800 In this case skb->len is at offset 112 bytes (0x70) why fault happens at 0x800+0x70 = 0x870 Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/poison.h | 3 +++ include/linux/skbuff.h | 7 +++++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/poison.h b/include/linux/poison.h index 2d3249eb0e62..2823f90fdab4 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -81,6 +81,9 @@ /********** net/core/page_pool.c **********/ #define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) +/********** net/core/skbuff.c **********/ +#define SKB_LIST_POISON_NEXT ((void *)(0x800 + POISON_POINTER_DELTA)) + /********** kernel/bpf/ **********/ #define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA)) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5ba12185f43e..1fa95b916342 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1738,6 +1738,13 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb) skb->next = NULL; } +static inline void skb_poison_list(struct sk_buff *skb) +{ +#ifdef CONFIG_DEBUG_NET + skb->next = SKB_LIST_POISON_NEXT; +#endif +} + /* Iterate through singly-linked GSO fragments of an skb. */ #define skb_list_walk_safe(first, skb, next_skb) \ for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \ -- cgit v1.2.3 From 762ed313574652ac604fb95dd601232a6e0320ef Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 1 Feb 2023 17:07:36 -0800 Subject: platform/x86/intel/tpmi: Process CPU package mapping There is one Intel Out-of-Band (OOB) PCI device per CPU package. Since TPMI feature is exposed via OOB PCI device, there will be multiple TPMI device instances on a multi CPU package system. There are several PM features, which needs to associate APIC based CPU package ID information to a TPMI instance. For example if Intel Speed Select feature requires control of a CPU package, it needs to identify right TPMI device instance. There is one special TPMI ID (ID = 0x81) in the PFS. The MMIO region of this TPMI ID points to a mapping table: - PCI Bus ID - PCI Device ID - APIC based Package ID This mapping information can be used by any PM feature driver which requires mapping from a CPU package to a TPMI device instance. Unlike other TPMI features, device node is not created for this feature ID (0x81). Instead store the mapping information as platform data, which is part of the per PCI device TPMI instance (struct intel_tpmi_info). Later the TPMI feature drivers can get the mapping information using an interface "tpmi_get_platform_data()" Signed-off-by: Srinivas Pandruvada Reviewed-by: Pierre-Louis Bossart Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230202010738.2186174-6-srinivas.pandruvada@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/intel_tpmi.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/linux/intel_tpmi.h (limited to 'include/linux') diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h new file mode 100644 index 000000000000..5b665320ecb4 --- /dev/null +++ b/include/linux/intel_tpmi.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * intel_tpmi.h: Intel TPMI core external interface + */ + +#ifndef _INTEL_TPMI_H_ +#define _INTEL_TPMI_H_ + +/** + * struct intel_tpmi_plat_info - Platform information for a TPMI device instance + * @package_id: CPU Package id + * @bus_number: PCI bus number + * @device_number: PCI device number + * @function_number: PCI function number + * + * Structure to store platform data for a TPMI device instance. This + * struct is used to return data via tpmi_get_platform_data(). + */ +struct intel_tpmi_plat_info { + u8 package_id; + u8 bus_number; + u8 device_number; + u8 function_number; +}; + +struct intel_tpmi_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev); + +#endif -- cgit v1.2.3 From 6d957f1e1646039f51fe1f6c6060738f648c4c70 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 1 Feb 2023 17:07:37 -0800 Subject: platform/x86/intel/tpmi: ADD tpmi external interface for tpmi feature drivers Add interface to get resources and platform data. This will avoid code duplication. These interfaces includes: - Get resource count - Get resource at an index Signed-off-by: Srinivas Pandruvada Reviewed-by: Pierre-Louis Bossart Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230202010738.2186174-7-srinivas.pandruvada@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/intel_tpmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index 5b665320ecb4..f505788c05da 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -24,5 +24,7 @@ struct intel_tpmi_plat_info { }; struct intel_tpmi_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev); +struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int index); +int tpmi_get_resource_count(struct auxiliary_device *auxdev); #endif -- cgit v1.2.3 From 286c0e09e8e07de0f116a01aa234b05d9956dcf5 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 31 Jan 2023 15:42:59 +0100 Subject: can: bittiming: can_changelink() pass extack down callstack This is a preparation patch. In order to pass warning/error messages during netlink calls back to user space, pass the extack struct down the callstack of can_changelink(), the actual error messages will be added in the following ptaches. Link: https://lore.kernel.org/all/20230202110854.2318594-10-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index ef0a77173e3c..53d693ae5397 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -116,7 +116,7 @@ struct can_tdc_const { #ifdef CONFIG_CAN_CALC_BITTIMING int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, - const struct can_bittiming_const *btc); + const struct can_bittiming_const *btc, struct netlink_ext_ack *extack); void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, const struct can_bittiming *dbt, @@ -141,7 +141,8 @@ can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, int can_get_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc, const u32 *bitrate_const, - const unsigned int bitrate_const_cnt); + const unsigned int bitrate_const_cnt, + struct netlink_ext_ack *extack); /* * can_bit_time() - Duration of one bit -- cgit v1.2.3 From 5988bf737deed86d6186a21e73e2fc253a4ff466 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 27 Sep 2022 17:07:07 +0200 Subject: can: bittiming: factor out can_sjw_set_default() and can_sjw_check() Factor out the functionality of assigning a SJW default value into can_sjw_set_default() and the checking the SJW limits into can_sjw_check(). This functions will be improved and called from a different function in the following patches. Link: https://lore.kernel.org/all/20230202110854.2318594-11-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 53d693ae5397..6cb2ae308e3f 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -138,6 +138,11 @@ can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, } #endif /* CONFIG_CAN_CALC_BITTIMING */ +void can_sjw_set_default(struct can_bittiming *bt); + +int can_sjw_check(const struct net_device *dev, const struct can_bittiming *bt, + const struct can_bittiming_const *btc, struct netlink_ext_ack *extack); + int can_get_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc, const u32 *bitrate_const, -- cgit v1.2.3 From 3f16ba1c0768de6cdc6f65105757ef04dbfd8e5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 23 Dec 2022 21:30:19 +0000 Subject: HID: use standard debug APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The custom "debug" module parameter is fairly inflexible. It can only manage debugging for all calls dbg_hid() at the same time. Furthermore it creates a mismatch between calls to hid_dbg() which can be managed by CONFIG_DYNAMIC_DEBUG and dbg_hid() which is managed by the module parameter. Furthermore the change to pr_debug() allows the debugging statements to be completely compiled-out if desired. Signed-off-by: Thomas Weißschuh Tested-by: Bastien Nocera Link: https://lore.kernel.org/r/20221223-hid-dbg-v1-1-5dcf8794f7f9@weissschuh.net Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 3922b66c6da8..6074d2a828fd 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -870,8 +870,6 @@ extern bool hid_is_usb(const struct hid_device *hdev); /* HID core API */ -extern int hid_debug; - extern bool hid_ignore(struct hid_device *); extern int hid_add_device(struct hid_device *); extern void hid_destroy_device(struct hid_device *); @@ -1179,11 +1177,7 @@ int hid_pidff_init(struct hid_device *hid); #define hid_pidff_init NULL #endif -#define dbg_hid(fmt, ...) \ -do { \ - if (hid_debug) \ - printk(KERN_DEBUG "%s: " fmt, __FILE__, ##__VA_ARGS__); \ -} while (0) +#define dbg_hid(fmt, ...) pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__) #define hid_err(hid, fmt, ...) \ dev_err(&(hid)->dev, fmt, ##__VA_ARGS__) -- cgit v1.2.3 From 2edcedcd1dcb542cb09acabda1732de47dd82e68 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 9 Jun 2022 17:36:33 +0200 Subject: locking/lockdep: Remove lockdep_init_map_crosslock. The cross-release bits have been removed, lockdep_init_map_crosslock() is a leftover. Remove lockdep_init_map_crosslock. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Waiman Long Acked-by: Waiman Long Link: https://lore.kernel.org/r/20220311164457.46461-1-bigeasy@linutronix.de Link: https://lore.kernel.org/r/YqITgY+2aPITu96z@linutronix.de --- include/linux/lockdep.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 1f1099dac3f0..1023f349af71 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -435,7 +435,6 @@ enum xhlock_context_t { XHLOCK_CTX_NR, }; -#define lockdep_init_map_crosslock(m, n, k, s) do {} while (0) /* * To initialize a lockdep_map statically use this macro. * Note that _name must not be NULL. -- cgit v1.2.3 From eca0edaf6caa66b6eb26277a7dce5d7296cedfca Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Fri, 3 Feb 2023 09:22:21 -0300 Subject: module.h: Document klp_modinfo struct using kdoc Previously the documentation existed only in Documentation/livepatch directory. Signed-off-by: Marcos Paulo de Souza Signed-off-by: Luis Chamberlain --- include/linux/module.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 8c5909c0076c..6449ea59c074 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -352,6 +352,14 @@ struct mod_kallsyms { }; #ifdef CONFIG_LIVEPATCH +/** + * struct klp_modinfo - Elf information preserved from the livepatch module + * + * @hdr: Elf header + * @sechdrs: Section header table + * @secstrings: String table for the section headers + * @symndx: The symbol table section index + */ struct klp_modinfo { Elf_Ehdr hdr; Elf_Shdr *sechdrs; -- cgit v1.2.3 From 20f6d4f2a474fc2dc502358dcee3c9b18304ec1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 4 Feb 2023 05:47:03 +0000 Subject: of: make of_node_ktype constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit ee6d3dd4ed48 ("driver core: make kobj_type constant.") the driver core allows the usage of const struct kobj_type. Take advantage of this to constify the structure definition to prevent modification at runtime. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20230204-kobj_type-of-v1-1-5910c8ecb7a3@weissschuh.net Signed-off-by: Rob Herring --- include/linux/of.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 8b9f94386dc3..8bb348666709 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -101,7 +101,7 @@ struct of_reconfig_data { }; /* initialize a node */ -extern struct kobj_type of_node_ktype; +extern const struct kobj_type of_node_ktype; extern const struct fwnode_operations of_fwnode_ops; static inline void of_node_init(struct device_node *node) { -- cgit v1.2.3 From 7390609b0121a1b982c5ecdfcd72dc328e5784ee Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 6 Feb 2023 13:43:42 +0000 Subject: net: add helper eth_addr_add() Add a helper to add an offset to a ethernet address. This comes in handy if you have a base ethernet address for multiple interfaces. Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Acked-by: Jakub Kicinski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230206134356.839737-9-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/etherdevice.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index a541f0c4f146..224645f17c33 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -507,6 +507,20 @@ static inline void eth_addr_inc(u8 *addr) u64_to_ether_addr(u, addr); } +/** + * eth_addr_add() - Add (or subtract) an offset to/from the given MAC address. + * + * @offset: Offset to add. + * @addr: Pointer to a six-byte array containing Ethernet address to increment. + */ +static inline void eth_addr_add(u8 *addr, long offset) +{ + u64 u = ether_addr_to_u64(addr); + + u += offset; + u64_to_ether_addr(u, addr); +} + /** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure -- cgit v1.2.3 From c5d264d4b527c96ae8903376a4b195df47b05203 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 6 Feb 2023 13:43:43 +0000 Subject: of: base: add of_parse_phandle_with_optional_args() Add a new variant of the of_parse_phandle_with_args() which treats the cells name as optional. If it's missing, it is assumed that the phandle has no arguments. Up until now, a nvmem node didn't have any arguments, so all the device trees haven't any '#*-cells' property. But there is a need for an additional argument for the phandle, for which we need a '#*-cells' property. Therefore, we need to support nvmem nodes with and without this property. Signed-off-by: Michael Walle Reviewed-by: Rob Herring Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230206134356.839737-10-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/of.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 8b9f94386dc3..98c252d2d851 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1008,6 +1008,31 @@ static inline int of_parse_phandle_with_fixed_args(const struct device_node *np, index, out_args); } +/** + * of_parse_phandle_with_optional_args() - Find a node pointed by phandle in a list + * @np: pointer to a device tree node containing a list + * @list_name: property name that contains a list + * @cells_name: property name that specifies phandles' arguments count + * @index: index of a phandle to parse out + * @out_args: optional pointer to output arguments structure (will be filled) + * + * Same as of_parse_phandle_with_args() except that if the cells_name property + * is not found, cell_count of 0 is assumed. + * + * This is used to useful, if you have a phandle which didn't have arguments + * before and thus doesn't have a '#*-cells' property but is now migrated to + * having arguments while retaining backwards compatibility. + */ +static inline int of_parse_phandle_with_optional_args(const struct device_node *np, + const char *list_name, + const char *cells_name, + int index, + struct of_phandle_args *out_args) +{ + return __of_parse_phandle_with_args(np, list_name, cells_name, + 0, index, out_args); +} + /** * of_property_count_u8_elems - Count the number of u8 elements in a property * -- cgit v1.2.3 From 5d8e6e6c10a3d37486d263b16ddc15991a7e4a88 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 6 Feb 2023 13:43:46 +0000 Subject: nvmem: core: add an index parameter to the cell Sometimes a cell can represend multiple values. For example, a base ethernet address stored in the NVMEM can be expanded into multiple discreet ones by adding an offset. For this use case, introduce an index parameter which is then used to distiguish between values. This parameter will then be passed to the post process hook which can then use it to create different values during reading. At the moment, there is only support for the device tree path. You can add the index to the phandle, e.g. &net { nvmem-cells = <&base_mac_address 2>; nvmem-cell-names = "mac-address"; }; &nvmem_provider { base_mac_address: base-mac-address@0 { #nvmem-cell-cells = <1>; reg = <0 6>; }; }; Signed-off-by: Michael Walle Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230206134356.839737-13-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index bb15c9234e21..55181d837969 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -20,8 +20,8 @@ typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset, typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset, void *val, size_t bytes); /* used for vendor specific post processing of cell data */ -typedef int (*nvmem_cell_post_process_t)(void *priv, const char *id, unsigned int offset, - void *buf, size_t bytes); +typedef int (*nvmem_cell_post_process_t)(void *priv, const char *id, int index, + unsigned int offset, void *buf, size_t bytes); enum nvmem_type { NVMEM_TYPE_UNKNOWN = 0, -- cgit v1.2.3 From fbd03d27776c6121a483921601418e3c8f0ff37e Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 6 Feb 2023 13:43:47 +0000 Subject: nvmem: core: move struct nvmem_cell_info to nvmem-provider.h struct nvmem_cell_info is used to describe a cell. Thus this should really be in the nvmem-provider's header. There are two (unused) nvmem access methods which use the nvmem_cell_info to describe the cell to be accesses. One can argue, that they will create a cell before accessing, thus they are both a provider and a consumer. struct nvmem_cell_info will get used more and more by nvmem-providers, don't force them to also include the consumer header, although they are not. Signed-off-by: Michael Walle Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230206134356.839737-14-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-consumer.h | 10 +--------- include/linux/nvmem-provider.h | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 980f9c9ac0bc..1f62f7ba71ca 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -18,15 +18,7 @@ struct device_node; /* consumer cookie */ struct nvmem_cell; struct nvmem_device; - -struct nvmem_cell_info { - const char *name; - unsigned int offset; - unsigned int bytes; - unsigned int bit_offset; - unsigned int nbits; - struct device_node *np; -}; +struct nvmem_cell_info; /** * struct nvmem_cell_lookup - cell lookup entry diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 55181d837969..a953a3a59535 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -14,7 +14,6 @@ #include struct nvmem_device; -struct nvmem_cell_info; typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset, void *val, size_t bytes); typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset, @@ -47,6 +46,24 @@ struct nvmem_keepout { unsigned char value; }; +/** + * struct nvmem_cell_info - NVMEM cell description + * @name: Name. + * @offset: Offset within the NVMEM device. + * @bytes: Length of the cell. + * @bit_offset: Bit offset if cell is smaller than a byte. + * @nbits: Number of bits. + * @np: Optional device_node pointer. + */ +struct nvmem_cell_info { + const char *name; + unsigned int offset; + unsigned int bytes; + unsigned int bit_offset; + unsigned int nbits; + struct device_node *np; +}; + /** * struct nvmem_config - NVMEM device configuration * -- cgit v1.2.3 From 2ded6830d376d5e7bf43d59f7f7fdf1a59abc676 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 6 Feb 2023 13:43:49 +0000 Subject: nvmem: core: add nvmem_add_one_cell() Add a new function to add exactly one cell. This will be used by the nvmem layout drivers to add custom cells. In contrast to the nvmem_add_cells(), this has the advantage that we don't have to assemble a list of cells on runtime. Signed-off-by: Michael Walle Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230206134356.839737-16-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index a953a3a59535..0262b86194eb 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -153,6 +153,9 @@ struct nvmem_device *devm_nvmem_register(struct device *dev, void nvmem_add_cell_table(struct nvmem_cell_table *table); void nvmem_del_cell_table(struct nvmem_cell_table *table); +int nvmem_add_one_cell(struct nvmem_device *nvmem, + const struct nvmem_cell_info *info); + #else static inline struct nvmem_device *nvmem_register(const struct nvmem_config *c) @@ -170,6 +173,11 @@ devm_nvmem_register(struct device *dev, const struct nvmem_config *c) static inline void nvmem_add_cell_table(struct nvmem_cell_table *table) {} static inline void nvmem_del_cell_table(struct nvmem_cell_table *table) {} +static inline int nvmem_add_one_cell(struct nvmem_device *nvmem, + const struct nvmem_cell_info *info) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_NVMEM */ #endif /* ifndef _LINUX_NVMEM_PROVIDER_H */ -- cgit v1.2.3 From 15a7cf8caabee4613764abe7814dd3162cb64137 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Mon, 6 Feb 2023 16:17:57 +0000 Subject: usb: gadget: configfs: Support arbitrary string descriptors Add a framework to allow users to define arbitrary string descriptors for a USB Gadget. This is modelled as a new type of config item rather than as hardcoded attributes so as to be as flexible as possible. Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20230206161802.892954-7-dan.scally@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gadget.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index dc3092cea99e..00750f7020f3 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -15,6 +15,7 @@ #ifndef __LINUX_USB_GADGET_H #define __LINUX_USB_GADGET_H +#include #include #include #include @@ -821,6 +822,16 @@ int usb_gadget_get_string(const struct usb_gadget_strings *table, int id, u8 *bu /* check if the given language identifier is valid */ bool usb_validate_langid(u16 langid); +struct gadget_string { + struct config_item item; + struct list_head list; + char string[USB_MAX_STRING_LEN]; + struct usb_string usb_string; +}; + +#define to_gadget_string(str_item)\ +container_of(str_item, struct gadget_string, item) + /*-------------------------------------------------------------------------*/ /* utility to simplify managing config descriptors */ -- cgit v1.2.3 From c033563220e0f7a82f4ae8d698284cced94fd6cf Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Mon, 6 Feb 2023 16:17:58 +0000 Subject: usb: gadget: configfs: Attach arbitrary strings to cdev Attach any arbitrary strings that are defined to the composite dev. We handle the old-style manufacturer, product and serialnumbers strings in the same function for simplicity. Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20230206161802.892954-8-dan.scally@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 7ef8cea67f50..608dc962748b 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -494,6 +494,7 @@ struct usb_composite_dev { struct usb_composite_driver *driver; u8 next_string_id; char *def_manufacturer; + struct usb_string *usb_strings; /* the gadget driver won't enable the data pullup * while the deactivation count is nonzero. -- cgit v1.2.3 From 2e0a547164b1384a87fd3500a01297222b0971b0 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Fri, 3 Feb 2023 16:35:14 -0500 Subject: fanotify: Ensure consistent variable type for response The user space API for the response variable is __u32. This patch makes sure that the whole path through the kernel uses u32 so that there is no sign extension or truncation of the user space response. Suggested-by: Steve Grubb Link: https://lore.kernel.org/r/12617626.uLZWGnKmhe@x2 Signed-off-by: Richard Guy Briggs Acked-by: Paul Moore Tested-by: Steve Grubb Acked-by: Steve Grubb Signed-off-by: Jan Kara Message-Id: <3778cb0b3501bc4e686ba7770b20eb9ab0506cf4.1675373475.git.rgb@redhat.com> --- include/linux/audit.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 3608992848d3..d6b7d0c7ce43 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -416,7 +416,7 @@ extern void __audit_log_capset(const struct cred *new, const struct cred *old); extern void __audit_mmap_fd(int fd, int flags); extern void __audit_openat2_how(struct open_how *how); extern void __audit_log_kern_module(char *name); -extern void __audit_fanotify(unsigned int response); +extern void __audit_fanotify(u32 response); extern void __audit_tk_injoffset(struct timespec64 offset); extern void __audit_ntp_log(const struct audit_ntp_data *ad); extern void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, @@ -523,7 +523,7 @@ static inline void audit_log_kern_module(char *name) __audit_log_kern_module(name); } -static inline void audit_fanotify(unsigned int response) +static inline void audit_fanotify(u32 response) { if (!audit_dummy_context()) __audit_fanotify(response); @@ -679,7 +679,7 @@ static inline void audit_log_kern_module(char *name) { } -static inline void audit_fanotify(unsigned int response) +static inline void audit_fanotify(u32 response) { } static inline void audit_tk_injoffset(struct timespec64 offset) -- cgit v1.2.3 From 70529a199574c15a40f46b14256633b02ba10ca2 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Fri, 3 Feb 2023 16:35:15 -0500 Subject: fanotify: define struct members to hold response decision context This patch adds a flag, FAN_INFO and an extensible buffer to provide additional information about response decisions. The buffer contains one or more headers defining the information type and the length of the following information. The patch defines one additional information type, FAN_RESPONSE_INFO_AUDIT_RULE, to audit a rule number. This will allow for the creation of other information types in the future if other users of the API identify different needs. The kernel can be tested if it supports a given info type by supplying the complete info extension but setting fd to FAN_NOFD. It will return the expected size but not issue an audit record. Suggested-by: Steve Grubb Link: https://lore.kernel.org/r/2745105.e9J7NaK4W3@x2 Suggested-by: Jan Kara Link: https://lore.kernel.org/r/20201001101219.GE17860@quack2.suse.cz Tested-by: Steve Grubb Acked-by: Steve Grubb Signed-off-by: Richard Guy Briggs Signed-off-by: Jan Kara Message-Id: <10177cfcae5480926b7176321a28d9da6835b667.1675373475.git.rgb@redhat.com> --- include/linux/fanotify.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 8ad743def6f3..4f1c4f603118 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -122,6 +122,11 @@ #define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \ FANOTIFY_EVENT_FLAGS) +/* These masks check for invalid bits in permission responses. */ +#define FANOTIFY_RESPONSE_ACCESS (FAN_ALLOW | FAN_DENY) +#define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO) +#define FANOTIFY_RESPONSE_VALID_MASK (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS) + /* Do not use these old uapi constants internally */ #undef FAN_ALL_CLASS_BITS #undef FAN_ALL_INIT_FLAGS -- cgit v1.2.3 From 032bffd494e3924cc8b854b696ef9b5b7396b883 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Fri, 3 Feb 2023 16:35:16 -0500 Subject: fanotify,audit: Allow audit to use the full permission event response This patch passes the full response so that the audit function can use all of it. The audit function was updated to log the additional information in the AUDIT_FANOTIFY record. Currently the only type of fanotify info that is defined is an audit rule number, but convert it to hex encoding to future-proof the field. Hex encoding suggested by Paul Moore . The {subj,obj}_trust values are {0,1,2}, corresponding to no, yes, unknown. Sample records: type=FANOTIFY msg=audit(1600385147.372:590): resp=2 fan_type=1 fan_info=3137 subj_trust=3 obj_trust=5 type=FANOTIFY msg=audit(1659730979.839:284): resp=1 fan_type=0 fan_info=0 subj_trust=2 obj_trust=2 Suggested-by: Steve Grubb Link: https://lore.kernel.org/r/3075502.aeNJFYEL58@x2 Tested-by: Steve Grubb Acked-by: Steve Grubb Signed-off-by: Richard Guy Briggs Signed-off-by: Jan Kara Message-Id: --- include/linux/audit.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index d6b7d0c7ce43..31086a72e32a 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -14,6 +14,7 @@ #include #include #include +#include #define AUDIT_INO_UNSET ((unsigned long)-1) #define AUDIT_DEV_UNSET ((dev_t)-1) @@ -416,7 +417,7 @@ extern void __audit_log_capset(const struct cred *new, const struct cred *old); extern void __audit_mmap_fd(int fd, int flags); extern void __audit_openat2_how(struct open_how *how); extern void __audit_log_kern_module(char *name); -extern void __audit_fanotify(u32 response); +extern void __audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar); extern void __audit_tk_injoffset(struct timespec64 offset); extern void __audit_ntp_log(const struct audit_ntp_data *ad); extern void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, @@ -523,10 +524,10 @@ static inline void audit_log_kern_module(char *name) __audit_log_kern_module(name); } -static inline void audit_fanotify(u32 response) +static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar) { if (!audit_dummy_context()) - __audit_fanotify(response); + __audit_fanotify(response, friar); } static inline void audit_tk_injoffset(struct timespec64 offset) @@ -679,7 +680,7 @@ static inline void audit_log_kern_module(char *name) { } -static inline void audit_fanotify(u32 response) +static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar) { } static inline void audit_tk_injoffset(struct timespec64 offset) -- cgit v1.2.3 From 585e351ff359c032ea7ab48d999b252ba09f8051 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Sat, 4 Feb 2023 17:15:02 -0800 Subject: perf: RISC-V: Define helper functions expose hpm counter width and count KVM module needs to know how many hardware counters and the counter width that the platform supports. Otherwise, it will not be able to show optimal value of virtual counters to the guest. The virtual hardware counters also need to have the same width as the logical hardware counters for simplicity. However, there shouldn't be mapping between virtual hardware counters and logical hardware counters. As we don't support hetergeneous harts or counters with different width as of now, the implementation relies on the counter width of the first available programmable counter. Reviewed-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Atish Patra Signed-off-by: Anup Patel --- include/linux/perf/riscv_pmu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index e17e86ad6f3a..a1c3f7771481 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -73,6 +73,9 @@ void riscv_pmu_legacy_skip_init(void); static inline void riscv_pmu_legacy_skip_init(void) {}; #endif struct riscv_pmu *riscv_pmu_alloc(void); +#ifdef CONFIG_RISCV_PMU_SBI +int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr); +#endif #endif /* CONFIG_RISCV_PMU */ -- cgit v1.2.3 From 8929283a687bb4b71ec9d3f1e827aecf829c6b1a Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Sat, 4 Feb 2023 17:15:03 -0800 Subject: perf: RISC-V: Improve privilege mode filtering for perf Currently, the host driver doesn't have any method to identify if the requested perf event is from kvm or bare metal. As KVM runs in HS mode, there are no separate hypervisor privilege mode to distinguish between the attributes for guest/host. Improve the privilege mode filtering by using the event specific config1 field. Reviewed-by: Andrew Jones Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Anup Patel --- include/linux/perf/riscv_pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index a1c3f7771481..43fc892aa7d9 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -26,6 +26,8 @@ #define RISCV_PMU_STOP_FLAG_RESET 1 +#define RISCV_PMU_CONFIG1_GUEST_EVENTS 0x1 + struct cpu_hw_events { /* currently enabled events */ int n_events; -- cgit v1.2.3 From a9c4bdd505630469f93f5efedfc7a9ca254996c8 Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Mon, 30 Jan 2023 15:54:09 +0800 Subject: tracing: Acquire buffer from temparary trace sequence there is one dwc3 trace event declare as below, DECLARE_EVENT_CLASS(dwc3_log_event, TP_PROTO(u32 event, struct dwc3 *dwc), TP_ARGS(event, dwc), TP_STRUCT__entry( __field(u32, event) __field(u32, ep0state) __dynamic_array(char, str, DWC3_MSG_MAX) ), TP_fast_assign( __entry->event = event; __entry->ep0state = dwc->ep0state; ), TP_printk("event (%08x): %s", __entry->event, dwc3_decode_event(__get_str(str), DWC3_MSG_MAX, __entry->event, __entry->ep0state)) ); the problem is when trace function called, it will allocate up to DWC3_MSG_MAX bytes from trace event buffer, but never fill the buffer during fast assignment, it only fill the buffer when output function are called, so this means if output function are not called, the buffer will never used. add __get_buf(len) which acquiree buffer from iter->tmp_seq when trace output function called, it allow user write string to acquired buffer. the mentioned dwc3 trace event will changed as below, DECLARE_EVENT_CLASS(dwc3_log_event, TP_PROTO(u32 event, struct dwc3 *dwc), TP_ARGS(event, dwc), TP_STRUCT__entry( __field(u32, event) __field(u32, ep0state) ), TP_fast_assign( __entry->event = event; __entry->ep0state = dwc->ep0state; ), TP_printk("event (%08x): %s", __entry->event, dwc3_decode_event(__get_buf(DWC3_MSG_MAX), DWC3_MSG_MAX, __entry->event, __entry->ep0state)) );. Link: https://lore.kernel.org/linux-trace-kernel/1675065249-23368-1-git-send-email-quic_linyyuan@quicinc.com Cc: Masami Hiramatsu Signed-off-by: Linyu Yuan Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_seq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 0c4c7587d6c3..6be92bf559fe 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -95,6 +95,7 @@ extern void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, extern int trace_seq_hex_dump(struct trace_seq *s, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); +char *trace_seq_acquire(struct trace_seq *s, unsigned int len); #else /* CONFIG_TRACING */ static inline __printf(2, 3) @@ -139,6 +140,10 @@ static inline int trace_seq_path(struct trace_seq *s, const struct path *path) { return 0; } +static inline char *trace_seq_acquire(struct trace_seq *s, unsigned int len) +{ + return NULL; +} #endif /* CONFIG_TRACING */ #endif /* _LINUX_TRACE_SEQ_H */ -- cgit v1.2.3 From d503b8f7474fe7ac616518f7fc49773cbab49f36 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 7 Feb 2023 12:28:52 -0500 Subject: tracing: Add trace_array_puts() to write into instance Add a generic trace_array_puts() that can be used to "trace_puts()" into an allocated trace_array instance. This is just another variant of trace_array_printk(). Link: https://lkml.kernel.org/r/20230207173026.584717290@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Reviewed-by: Ross Zwisler Signed-off-by: Steven Rostedt (Google) --- include/linux/trace.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace.h b/include/linux/trace.h index 80ffda871749..2a70a447184c 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -33,6 +33,18 @@ struct trace_array; int register_ftrace_export(struct trace_export *export); int unregister_ftrace_export(struct trace_export *export); +/** + * trace_array_puts - write a constant string into the trace buffer. + * @tr: The trace array to write to + * @str: The constant string to write + */ +#define trace_array_puts(tr, str) \ + ({ \ + str ? __trace_array_puts(tr, _THIS_IP_, str, strlen(str)) : -1; \ + }) +int __trace_array_puts(struct trace_array *tr, unsigned long ip, + const char *str, int size); + void trace_printk_init_buffers(void); __printf(3, 4) int trace_array_printk(struct trace_array *tr, unsigned long ip, -- cgit v1.2.3 From 115f1a5c42bdad9a9ea356fc0b4a39ec7537947f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Feb 2023 17:31:00 +0000 Subject: net: add SKB_HEAD_ALIGN() helper We have many places using this expression: SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) Use of SKB_HEAD_ALIGN() will allow to clean them. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Paolo Abeni Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1fa95b916342..c3df3b55da97 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -255,6 +255,14 @@ #define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES) #define SKB_WITH_OVERHEAD(X) \ ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + +/* For X bytes available in skb->head, what is the minimal + * allocation needed, knowing struct skb_shared_info needs + * to be aligned. + */ +#define SKB_HEAD_ALIGN(X) (SKB_DATA_ALIGN(X) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + #define SKB_MAX_ORDER(X, ORDER) \ SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X)) #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) -- cgit v1.2.3 From 26759bee43ea7a29fcf9b06d476f7d02ec990ee3 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 6 Feb 2023 19:00:06 +0100 Subject: net, xdp: Add missing xdp_features description Add missing xdp_features field description in the struct net_device documentation. This patch fix the following warning: [...] ./include/linux/netdevice.h:2375: warning: Function parameter or member 'xdp_features' not described in 'net_device' [...] Fixes: d3d854fd6a1d ("netdev-genl: create a simple family for netdev stuff") Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/7878544903d855b49e838c9d59f715bde0b5e63b.1675705948.git.lorenzo@kernel.org --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0f7967591288..9bb11da36ef0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1819,6 +1819,7 @@ enum netdev_ml_priv_type { * of Layer 2 headers. * * @flags: Interface flags (a la BSD) + * @xdp_features: XDP capability supported by the device * @priv_flags: Like 'flags' but invisible to userspace, * see if.h for the definitions * @gflags: Global flags ( kept as legacy ) -- cgit v1.2.3 From 67257cba905d6c5660851968b55b7c5c3dc1ecc5 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 5 Jan 2023 10:03:46 +0200 Subject: net/mlx5: Remove redundant health work lock Commit 90e7cb78b815 ("net/mlx5: fix missing mutex_unlock in mlx5_fw_fatal_reporter_err_work()") introduced another checking of MLX5_DROP_HEALTH_NEW_WORK. At this point, the first check of MLX5_DROP_HEALTH_NEW_WORK is redundant and so is the lock that protects it. Remove the lock and rename MLX5_DROP_HEALTH_NEW_WORK to reflect these changes. Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cd529e051b4d..91e8160ed087 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -430,8 +430,6 @@ struct mlx5_core_health { u8 synd; u32 fatal_error; u32 crdump_size; - /* wq spinlock to synchronize draining */ - spinlock_t wq_lock; struct workqueue_struct *wq; unsigned long flags; struct work_struct fatal_report_work; -- cgit v1.2.3 From 7dfcd110a4584a4354f6cd0496f22da6db78cb17 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 12 Jan 2023 14:33:00 +0200 Subject: net/mlx5: fw_tracer, Add support for strings DB update event In case a new string DB is added to the FW, the FW publishes an event notifying the strings DB have updated. Add support in driver for handling this event. Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index bc531bd9804f..70b3eacf2bbb 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -367,6 +367,7 @@ enum mlx5_driver_event { enum { MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE = 0x0, MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE = 0x1, + MLX5_TRACER_SUBTYPE_STRINGS_DB_UPDATE = 0x2, }; enum { -- cgit v1.2.3 From 5bad5d55d884d57acba92a3309cde4cbb26dfefc Mon Sep 17 00:00:00 2001 From: Wang Yong Date: Thu, 2 Feb 2023 08:13:42 +0000 Subject: KVM: update code comment in struct kvm_vcpu Commit c5b077549136 ("KVM: Convert the kvm->vcpus array to a xarray") changed kvm->vcpus array to a xarray, so update the code comment of kvm_vcpu->vcpu_idx accordingly. Signed-off-by: Wang Yong Link: https://lore.kernel.org/r/20230202081342.856687-1-yongw.kernel@gmail.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a7d6a6111f5e..8ada23756b0e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -324,7 +324,7 @@ struct kvm_vcpu { #endif int cpu; int vcpu_id; /* id given by userspace at creation */ - int vcpu_idx; /* index in kvm->vcpus array */ + int vcpu_idx; /* index into kvm->vcpu_array */ int ____srcu_idx; /* Don't use this directly. You've been warned. */ #ifdef CONFIG_PROVE_RCU int srcu_depth; -- cgit v1.2.3 From 43245117806ff8914e37327b610fc08b5ddedc91 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 20 Jan 2023 20:24:28 -0800 Subject: lib/find: introduce find_nth_and_andnot_bit In the following patches the function is used to implement in-place bitmaps traversing without storing intermediate result in temporary bitmaps. Signed-off-by: Yury Norov Acked-by: Tariq Toukan Reviewed-by: Jacob Keller Reviewed-by: Peter Lafreniere Signed-off-by: Jakub Kicinski --- include/linux/find.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/find.h b/include/linux/find.h index ccaf61a0f5fd..4647864a5ffd 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -22,6 +22,9 @@ unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long unsigned long size, unsigned long n); unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long n); +unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + const unsigned long *addr3, unsigned long size, + unsigned long n); extern unsigned long _find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); @@ -255,6 +258,36 @@ unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned lon return __find_nth_andnot_bit(addr1, addr2, size, n); } +/** + * find_nth_and_andnot_bit - find N'th set bit in 2 memory regions, + * excluding those set in 3rd region + * @addr1: The 1st address to start the search at + * @addr2: The 2nd address to start the search at + * @addr3: The 3rd address to start the search at + * @size: The maximum number of bits to search + * @n: The number of set bit, which position is needed, counting from 0 + * + * Returns the bit number of the N'th set bit. + * If no such, returns @size. + */ +static __always_inline +unsigned long find_nth_and_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, + const unsigned long *addr3, + unsigned long size, unsigned long n) +{ + if (n >= size) + return size; + + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & (~*addr3) & GENMASK(size - 1, 0); + + return val ? fns(val, n) : size; + } + + return __find_nth_and_andnot_bit(addr1, addr2, addr3, size, n); +} + #ifndef find_first_and_bit /** * find_first_and_bit - find the first set bit in both memory regions -- cgit v1.2.3 From 62f4386e564d31c7d0ed7d835843e2685f99ae71 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 20 Jan 2023 20:24:29 -0800 Subject: cpumask: introduce cpumask_nth_and_andnot Introduce cpumask_nth_and_andnot() based on find_nth_and_andnot_bit(). It's used in the following patch to traverse cpumasks without storing intermediate result in temporary cpumask. Signed-off-by: Yury Norov Acked-by: Tariq Toukan Reviewed-by: Jacob Keller Reviewed-by: Peter Lafreniere Signed-off-by: Jakub Kicinski --- include/linux/cpumask.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c2aa0aa26b45..7b16aede7ac5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -391,6 +391,26 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, nr_cpumask_bits, cpumask_check(cpu)); } +/** + * cpumask_nth_and_andnot - get the Nth cpu set in 1st and 2nd cpumask, and clear in 3rd. + * @srcp1: the cpumask pointer + * @srcp2: the cpumask pointer + * @srcp3: the cpumask pointer + * @cpu: the N'th cpu to find, starting from 0 + * + * Returns >= nr_cpu_ids if such cpu doesn't exist. + */ +static __always_inline +unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp1, + const struct cpumask *srcp2, + const struct cpumask *srcp3) +{ + return find_nth_and_andnot_bit(cpumask_bits(srcp1), + cpumask_bits(srcp2), + cpumask_bits(srcp3), + nr_cpumask_bits, cpumask_check(cpu)); +} + #define CPU_BITS_NONE \ { \ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ -- cgit v1.2.3 From cd7f55359c90a4108e6528e326b8623fce1ad72a Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 20 Jan 2023 20:24:30 -0800 Subject: sched: add sched_numa_find_nth_cpu() The function finds Nth set CPU in a given cpumask starting from a given node. Leveraging the fact that each hop in sched_domains_numa_masks includes the same or greater number of CPUs than the previous one, we can use binary search on hops instead of linear walk, which makes the overall complexity of O(log n) in terms of number of cpumask_weight() calls. Signed-off-by: Yury Norov Acked-by: Tariq Toukan Reviewed-by: Jacob Keller Reviewed-by: Peter Lafreniere Signed-off-by: Jakub Kicinski --- include/linux/topology.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 4564faafd0e1..72f264575698 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -245,5 +245,13 @@ static inline const struct cpumask *cpu_cpu_mask(int cpu) return cpumask_of_node(cpu_to_node(cpu)); } +#ifdef CONFIG_NUMA +int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node); +#else +static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) +{ + return cpumask_nth(cpu, cpus); +} +#endif /* CONFIG_NUMA */ #endif /* _LINUX_TOPOLOGY_H */ -- cgit v1.2.3 From 9feae65845f7b16376716fe70b7d4b9bf8721848 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 20 Jan 2023 20:24:33 -0800 Subject: sched/topology: Introduce sched_numa_hop_mask() Tariq has pointed out that drivers allocating IRQ vectors would benefit from having smarter NUMA-awareness - cpumask_local_spread() only knows about the local node and everything outside is in the same bucket. sched_domains_numa_masks is pretty much what we want to hand out (a cpumask of CPUs reachable within a given distance budget), introduce sched_numa_hop_mask() to export those cpumasks. Link: http://lore.kernel.org/r/20220728191203.4055-1-tariqt@nvidia.com Signed-off-by: Valentin Schneider Reviewed-by: Yury Norov Signed-off-by: Yury Norov Signed-off-by: Jakub Kicinski --- include/linux/topology.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 72f264575698..344c2362755a 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -247,11 +247,18 @@ static inline const struct cpumask *cpu_cpu_mask(int cpu) #ifdef CONFIG_NUMA int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node); +extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops); #else static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) { return cpumask_nth(cpu, cpus); } + +static inline const struct cpumask * +sched_numa_hop_mask(unsigned int node, unsigned int hops) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_NUMA */ #endif /* _LINUX_TOPOLOGY_H */ -- cgit v1.2.3 From 06ac01721f7d07da722abe0ec6f147b90bfc8c77 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 20 Jan 2023 20:24:34 -0800 Subject: sched/topology: Introduce for_each_numa_hop_mask() The recently introduced sched_numa_hop_mask() exposes cpumasks of CPUs reachable within a given distance budget, wrap the logic for iterating over all (distance, mask) values inside an iterator macro. Signed-off-by: Valentin Schneider Reviewed-by: Yury Norov Signed-off-by: Yury Norov Signed-off-by: Jakub Kicinski --- include/linux/topology.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 344c2362755a..fea32377f7c7 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -261,4 +261,22 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops) } #endif /* CONFIG_NUMA */ +/** + * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance + * from a given node. + * @mask: the iteration variable. + * @node: the NUMA node to start the search from. + * + * Requires rcu_lock to be held. + * + * Yields cpu_online_mask for @node == NUMA_NO_NODE. + */ +#define for_each_numa_hop_mask(mask, node) \ + for (unsigned int __hops = 0; \ + mask = (node != NUMA_NO_NODE || __hops) ? \ + sched_numa_hop_mask(node, __hops) : \ + cpu_online_mask, \ + !IS_ERR_OR_NULL(mask); \ + __hops++) + #endif /* _LINUX_TOPOLOGY_H */ -- cgit v1.2.3 From c3bdbaea654d8df39112de33037106134a520dc7 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Sun, 22 Jan 2023 21:09:40 +0200 Subject: net/mlx5: Store page counters in a single array Currently, an independent page counter is used for tracking memory usage for each function type such as VF, PF and host PF (DPU). For better code-readibilty, use a single array that stores the number of allocated memory pages for each function type. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 76ef2e4fde38..82a9bd4274b8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -573,6 +573,13 @@ struct mlx5_debugfs_entries { struct dentry *lag_debugfs; }; +enum mlx5_func_type { + MLX5_PF, + MLX5_VF, + MLX5_HOST_PF, + MLX5_FUNC_TYPE_NUM, +}; + struct mlx5_ft_pool; struct mlx5_priv { /* IRQ table valid only for real pci devices PF or VF */ @@ -583,11 +590,10 @@ struct mlx5_priv { struct mlx5_nb pg_nb; struct workqueue_struct *pg_wq; struct xarray page_root_xa; - u32 fw_pages; atomic_t reg_pages; struct list_head free_list; - u32 vfs_pages; - u32 host_pf_pages; + u32 fw_pages; + u32 page_counters[MLX5_FUNC_TYPE_NUM]; u32 fw_pages_alloc_failed; u32 give_pages_dropped; u32 reclaim_pages_discard; -- cgit v1.2.3 From 9965bbebae59b3563a4d95e4aed121e8965dfdc2 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Sun, 22 Jan 2023 23:24:56 +0200 Subject: net/mlx5: Expose SF firmware pages counter Currently, each core device has VF pages counter which stores number of fw pages used by its VFs and SFs. The current design led to a hang when performing firmware reset on DPU, where the DPU PFs stalled in sriov unload flow due to waiting on release of SFs pages instead of waiting on only VFs pages. Thus, Add a separate counter for SF firmware pages, which will prevent the stall scenario described above. Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver") Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 82a9bd4274b8..333c1fec72f8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -576,6 +576,7 @@ struct mlx5_debugfs_entries { enum mlx5_func_type { MLX5_PF, MLX5_VF, + MLX5_SF, MLX5_HOST_PF, MLX5_FUNC_TYPE_NUM, }; -- cgit v1.2.3 From 097d7c1fcb8d4b52c62a36f94b8f18bc21a24934 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 7 Feb 2023 22:21:07 -0800 Subject: fscrypt: clean up fscrypt_add_test_dummy_key() Now that fscrypt_add_test_dummy_key() is only called by setup_file_encryption_key() and not by the individual filesystems, un-export it. Also change its prototype to take the fscrypt_key_specifier directly, as the caller already has it. Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20230208062107.199831-6-ebiggers@kernel.org --- include/linux/fscrypt.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 4f5f8a651213..44848d870046 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -309,8 +309,6 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) /* keyring.c */ void fscrypt_destroy_keyring(struct super_block *sb); int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); -int fscrypt_add_test_dummy_key(struct super_block *sb, - const struct fscrypt_dummy_policy *dummy_policy); int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg); int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg); int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg); @@ -530,13 +528,6 @@ static inline int fscrypt_ioctl_add_key(struct file *filp, void __user *arg) return -EOPNOTSUPP; } -static inline int -fscrypt_add_test_dummy_key(struct super_block *sb, - const struct fscrypt_dummy_policy *dummy_policy) -{ - return 0; -} - static inline int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg) { return -EOPNOTSUPP; -- cgit v1.2.3 From a8f1a19d27ef9b13574195ae1571158529473541 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 7 Feb 2023 11:52:11 +0100 Subject: net: micrel: Add support for lan8841 PHY The LAN8841 is completely integrated triple-speed (10BASE-T/ 100BASE-TX/ 1000BASE-T) Ethernet physical layer transceivers for transmission and reception of data on standard CAT-5, as well as CAT-5e and CAT-6, unshielded twisted pair (UTP) cables. The LAN8841 offers the industry-standard GMII/MII as well as the RGMII. Some of the features of the PHY are: - Wake on LAN - Auto-MDIX - IEEE 1588-2008 (V2) - LinkMD Capable diagnosis Currently the patch offers support only for link configuration. Signed-off-by: Horatiu Vultur Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 771e050883db..8bef1ab62bba 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -31,6 +31,7 @@ #define PHY_ID_KSZ9131 0x00221640 #define PHY_ID_LAN8814 0x00221660 #define PHY_ID_LAN8804 0x00221670 +#define PHY_ID_LAN8841 0x00221650 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 -- cgit v1.2.3 From 67cad5c67019c38126b749621665b6723d3ae7e6 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 6 Feb 2023 17:41:57 -0800 Subject: driver core: fw_devlink: Add DL_FLAG_CYCLE support to device links fw_devlink uses DL_FLAG_SYNC_STATE_ONLY device link flag for two purposes: 1. To allow a parent device to proxy its child device's dependency on a supplier so that the supplier doesn't get its sync_state() callback before the child device/consumer can be added and probed. In this usage scenario, we need to ignore cycles for ensure correctness of sync_state() callbacks. 2. When there are dependency cycles in firmware, we don't know which of those dependencies are valid. So, we have to ignore them all wrt probe ordering while still making sure the sync_state() callbacks come correctly. However, when detecting dependency cycles, there can be multiple dependency cycles between two devices that we need to detect. For example: A -> B -> A and A -> C -> B -> A. To detect multiple cycles correct, we need to be able to differentiate DL_FLAG_SYNC_STATE_ONLY device links used for (1) vs (2) above. To allow this differentiation, add a DL_FLAG_CYCLE that can be use to mark use case (2). We can then use the DL_FLAG_CYCLE to decide which DL_FLAG_SYNC_STATE_ONLY device links to follow when looking for dependency cycles. Fixes: 2de9d8e0d2fe ("driver core: fw_devlink: Improve handling of cyclic dependencies") Signed-off-by: Saravana Kannan Tested-by: Colin Foster Tested-by: Sudeep Holla Tested-by: Douglas Anderson Tested-by: Geert Uytterhoeven Tested-by: Luca Weiss # qcom/sm7225-fairphone-fp4 Link: https://lore.kernel.org/r/20230207014207.1678715-6-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 90aaf308c259..1508e637bb26 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -328,6 +328,7 @@ enum device_link_state { #define DL_FLAG_MANAGED BIT(6) #define DL_FLAG_SYNC_STATE_ONLY BIT(7) #define DL_FLAG_INFERRED BIT(8) +#define DL_FLAG_CYCLE BIT(9) /** * enum dl_dev_state - Device driver presence tracking information. -- cgit v1.2.3 From 6a6dfdf8b3ff337be5a447e9f4e71969f18370ad Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 6 Feb 2023 17:41:58 -0800 Subject: driver core: fw_devlink: Allow marking a fwnode link as being part of a cycle To improve detection and handling of dependency cycles, we need to be able to mark fwnode links as being part of cycles. fwnode links marked as being part of a cycle should not block their consumers from probing. Fixes: 2de9d8e0d2fe ("driver core: fw_devlink: Improve handling of cyclic dependencies") Signed-off-by: Saravana Kannan Tested-by: Colin Foster Tested-by: Sudeep Holla Tested-by: Douglas Anderson Tested-by: Geert Uytterhoeven Tested-by: Luca Weiss # qcom/sm7225-fairphone-fp4 Link: https://lore.kernel.org/r/20230207014207.1678715-7-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 89b9bdfca925..fdf2ee0285b7 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -18,7 +18,7 @@ struct fwnode_operations; struct device; /* - * fwnode link flags + * fwnode flags * * LINKS_ADDED: The fwnode has already be parsed to add fwnode links. * NOT_DEVICE: The fwnode will never be populated as a struct device. @@ -36,6 +36,7 @@ struct device; #define FWNODE_FLAG_INITIALIZED BIT(2) #define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3) #define FWNODE_FLAG_BEST_EFFORT BIT(4) +#define FWNODE_FLAG_VISITED BIT(5) struct fwnode_handle { struct fwnode_handle *secondary; @@ -46,11 +47,19 @@ struct fwnode_handle { u8 flags; }; +/* + * fwnode link flags + * + * CYCLE: The fwnode link is part of a cycle. Don't defer probe. + */ +#define FWLINK_FLAG_CYCLE BIT(0) + struct fwnode_link { struct fwnode_handle *supplier; struct list_head s_hook; struct fwnode_handle *consumer; struct list_head c_hook; + u8 flags; }; /** -- cgit v1.2.3 From cd115c0409f283edde94bd5a9a42dc42bee0aba8 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 6 Feb 2023 17:41:59 -0800 Subject: driver core: fw_devlink: Consolidate device link flag computation Consolidate the code that computes the flags to be used when creating a device link from a fwnode link. Fixes: 2de9d8e0d2fe ("driver core: fw_devlink: Improve handling of cyclic dependencies") Signed-off-by: Saravana Kannan Tested-by: Colin Foster Tested-by: Sudeep Holla Tested-by: Douglas Anderson Tested-by: Geert Uytterhoeven Tested-by: Luca Weiss # qcom/sm7225-fairphone-fp4 Link: https://lore.kernel.org/r/20230207014207.1678715-8-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index fdf2ee0285b7..5700451b300f 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -207,7 +207,6 @@ static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode, fwnode->flags &= ~FWNODE_FLAG_INITIALIZED; } -extern u32 fw_devlink_get_flags(void); extern bool fw_devlink_is_strict(void); int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); void fwnode_links_purge(struct fwnode_handle *fwnode); -- cgit v1.2.3 From 65db3d8b5231bd430c12150258125aca155aea97 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 7 Feb 2023 21:15:01 +0100 Subject: can: bittiming: can_calc_bittiming(): add missing parameter to no-op function In commit 286c0e09e8e0 ("can: bittiming: can_changelink() pass extack down callstack") a new parameter was added to can_calc_bittiming(), however the static inline no-op (which is used if CONFIG_CAN_CALC_BITTIMING is disabled) wasn't converted. Add the new parameter to the static inline no-op of can_calc_bittiming(). Fixes: 286c0e09e8e0 ("can: bittiming: can_changelink() pass extack down callstack") Reported-by: kernel test robot Link: https://lore.kernel.org/20230207201734.2905618-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 6cb2ae308e3f..9b8a9c39614b 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -124,7 +124,7 @@ void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, #else /* !CONFIG_CAN_CALC_BITTIMING */ static inline int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, - const struct can_bittiming_const *btc) + const struct can_bittiming_const *btc, struct netlink_ext_ack *extack) { netdev_err(dev, "bit-timing calculation not available\n"); return -EINVAL; -- cgit v1.2.3 From 25b84002afb9dc9a91a7ea67166879c13ad82422 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Feb 2023 22:07:49 +0000 Subject: arm64: Support Clang UBSAN trap codes for better reporting When building with CONFIG_UBSAN_TRAP=y on arm64, Clang encodes the UBSAN check (handler) type in the esr. Extract this and actually report these traps as coming from the specific UBSAN check that tripped. Before: Internal error: BRK handler: 00000000f20003e8 [#1] PREEMPT SMP After: Internal error: UBSAN: shift out of bounds: 00000000f2005514 [#1] PREEMPT SMP Acked-by: Mark Rutland Reviewed-by: Ard Biesheuvel Acked-by: Mukesh Ojha Reviewed-by: Fangrui Song Cc: Catalin Marinas Cc: Will Deacon Cc: John Stultz Cc: Yongqin Liu Cc: Sami Tolvanen Cc: Yury Norov Cc: Andrey Konovalov Cc: Marco Elver Cc: linux-arm-kernel@lists.infradead.org Cc: llvm@lists.linux.dev Signed-off-by: Kees Cook --- include/linux/ubsan.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/linux/ubsan.h (limited to 'include/linux') diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h new file mode 100644 index 000000000000..bff7445498de --- /dev/null +++ b/include/linux/ubsan.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UBSAN_H +#define _LINUX_UBSAN_H + +#ifdef CONFIG_UBSAN_TRAP +const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type); +#endif + +#endif -- cgit v1.2.3 From 04ffde1319a715bd0550ded3580d4ea3bc003776 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 31 Jan 2023 17:37:59 -0800 Subject: uaccess: Add minimum bounds check on kernel buffer size While there is logic about the difference between ksize and usize, copy_struct_from_user() didn't check the size of the destination buffer (when it was known) against ksize. Add this check so there is an upper bounds check on the possible memset() call, otherwise lower bounds checks made by callers will trigger bounds warnings under -Warray-bounds. Seen under GCC 13: In function 'copy_struct_from_user', inlined from 'iommufd_fops_ioctl' at ../drivers/iommu/iommufd/main.c:333:8: ../include/linux/fortify-string.h:59:33: warning: '__builtin_memset' offset [57, 4294967294] is out of the bounds [0, 56] of object 'buf' with type 'union ucmd_buffer' [-Warray-bounds=] 59 | #define __underlying_memset __builtin_memset | ^ ../include/linux/fortify-string.h:453:9: note: in expansion of macro '__underlying_memset' 453 | __underlying_memset(p, c, __fortify_size); \ | ^~~~~~~~~~~~~~~~~~~ ../include/linux/fortify-string.h:461:25: note: in expansion of macro '__fortify_memset_chk' 461 | #define memset(p, c, s) __fortify_memset_chk(p, c, s, \ | ^~~~~~~~~~~~~~~~~~~~ ../include/linux/uaccess.h:334:17: note: in expansion of macro 'memset' 334 | memset(dst + size, 0, rest); | ^~~~~~ ../drivers/iommu/iommufd/main.c: In function 'iommufd_fops_ioctl': ../drivers/iommu/iommufd/main.c:311:27: note: 'buf' declared here 311 | union ucmd_buffer buf; | ^~~ Cc: Christian Brauner Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: Dinh Nguyen Cc: Catalin Marinas Cc: Andrew Morton Cc: Geert Uytterhoeven Cc: Alexander Potapenko Acked-by: Aleksa Sarai Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20230203193523.never.667-kees@kernel.org/ --- include/linux/uaccess.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index afb18f198843..ab9728138ad6 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -329,6 +329,10 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, size_t size = min(ksize, usize); size_t rest = max(ksize, usize) - size; + /* Double check if ksize is larger than a known object size. */ + if (WARN_ON_ONCE(ksize > __builtin_object_size(dst, 1))) + return -E2BIG; + /* Deal with trailing bytes. */ if (usize < ksize) { memset(dst + size, 0, rest); -- cgit v1.2.3 From 3bf90eca76c98c55c975fa817799789b9176f9f3 Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Fri, 3 Feb 2023 13:09:52 -0800 Subject: firmware: qcom_scm: Move qcom_scm.h to include/linux/firmware/qcom/ Move include/linux/qcom_scm.h to include/linux/firmware/qcom/qcom_scm.h. This removes 1 of a few remaining Qualcomm-specific headers into a more approciate subdirectory under include/. Suggested-by: Bjorn Andersson Signed-off-by: Elliot Berman Reviewed-by: Guru Das Srinagesh Acked-by: Mukesh Ojha Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230203210956.3580811-1-quic_eberman@quicinc.com --- include/linux/firmware/qcom/qcom_scm.h | 125 +++++++++++++++++++++++++++++++++ include/linux/qcom_scm.h | 125 --------------------------------- 2 files changed, 125 insertions(+), 125 deletions(-) create mode 100644 include/linux/firmware/qcom/qcom_scm.h delete mode 100644 include/linux/qcom_scm.h (limited to 'include/linux') diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h new file mode 100644 index 000000000000..1e449a5d7f5c --- /dev/null +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2010-2015, 2018-2019 The Linux Foundation. All rights reserved. + * Copyright (C) 2015 Linaro Ltd. + */ +#ifndef __QCOM_SCM_H +#define __QCOM_SCM_H + +#include +#include +#include + +#include + +#define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) +#define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 +#define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 +#define QCOM_SCM_HDCP_MAX_REQ_CNT 5 + +struct qcom_scm_hdcp_req { + u32 addr; + u32 val; +}; + +struct qcom_scm_vmperm { + int vmid; + int perm; +}; + +enum qcom_scm_ocmem_client { + QCOM_SCM_OCMEM_UNUSED_ID = 0x0, + QCOM_SCM_OCMEM_GRAPHICS_ID, + QCOM_SCM_OCMEM_VIDEO_ID, + QCOM_SCM_OCMEM_LP_AUDIO_ID, + QCOM_SCM_OCMEM_SENSORS_ID, + QCOM_SCM_OCMEM_OTHER_OS_ID, + QCOM_SCM_OCMEM_DEBUG_ID, +}; + +enum qcom_scm_sec_dev_id { + QCOM_SCM_MDSS_DEV_ID = 1, + QCOM_SCM_OCMEM_DEV_ID = 5, + QCOM_SCM_PCIE0_DEV_ID = 11, + QCOM_SCM_PCIE1_DEV_ID = 12, + QCOM_SCM_GFX_DEV_ID = 18, + QCOM_SCM_UFS_DEV_ID = 19, + QCOM_SCM_ICE_DEV_ID = 20, +}; + +enum qcom_scm_ice_cipher { + QCOM_SCM_ICE_CIPHER_AES_128_XTS = 0, + QCOM_SCM_ICE_CIPHER_AES_128_CBC = 1, + QCOM_SCM_ICE_CIPHER_AES_256_XTS = 3, + QCOM_SCM_ICE_CIPHER_AES_256_CBC = 4, +}; + +#define QCOM_SCM_PERM_READ 0x4 +#define QCOM_SCM_PERM_WRITE 0x2 +#define QCOM_SCM_PERM_EXEC 0x1 +#define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) +#define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) + +extern bool qcom_scm_is_available(void); + +extern int qcom_scm_set_cold_boot_addr(void *entry); +extern int qcom_scm_set_warm_boot_addr(void *entry); +extern void qcom_scm_cpu_power_down(u32 flags); +extern int qcom_scm_set_remote_state(u32 state, u32 id); + +struct qcom_scm_pas_metadata { + void *ptr; + dma_addr_t phys; + ssize_t size; +}; + +extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, + size_t size, + struct qcom_scm_pas_metadata *ctx); +void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); +extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, + phys_addr_t size); +extern int qcom_scm_pas_auth_and_reset(u32 peripheral); +extern int qcom_scm_pas_shutdown(u32 peripheral); +extern bool qcom_scm_pas_supported(u32 peripheral); + +extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); +extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); + +extern bool qcom_scm_restore_sec_cfg_available(void); +extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); +extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); +extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); +extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, + u32 cp_nonpixel_start, + u32 cp_nonpixel_size); +extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, + unsigned int *src, + const struct qcom_scm_vmperm *newvm, + unsigned int dest_cnt); + +extern bool qcom_scm_ocmem_lock_available(void); +extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, + u32 size, u32 mode); +extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, + u32 size); + +extern bool qcom_scm_ice_available(void); +extern int qcom_scm_ice_invalidate_key(u32 index); +extern int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, + enum qcom_scm_ice_cipher cipher, + u32 data_unit_size); + +extern bool qcom_scm_hdcp_available(void); +extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, + u32 *resp); + +extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); +extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); + +extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, + u64 limit_node, u32 node_id, u64 version); +extern int qcom_scm_lmh_profile_change(u32 profile_id); +extern bool qcom_scm_lmh_dcvsh_available(void); + +#endif diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h deleted file mode 100644 index 1e449a5d7f5c..000000000000 --- a/include/linux/qcom_scm.h +++ /dev/null @@ -1,125 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (c) 2010-2015, 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (C) 2015 Linaro Ltd. - */ -#ifndef __QCOM_SCM_H -#define __QCOM_SCM_H - -#include -#include -#include - -#include - -#define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) -#define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 -#define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 -#define QCOM_SCM_HDCP_MAX_REQ_CNT 5 - -struct qcom_scm_hdcp_req { - u32 addr; - u32 val; -}; - -struct qcom_scm_vmperm { - int vmid; - int perm; -}; - -enum qcom_scm_ocmem_client { - QCOM_SCM_OCMEM_UNUSED_ID = 0x0, - QCOM_SCM_OCMEM_GRAPHICS_ID, - QCOM_SCM_OCMEM_VIDEO_ID, - QCOM_SCM_OCMEM_LP_AUDIO_ID, - QCOM_SCM_OCMEM_SENSORS_ID, - QCOM_SCM_OCMEM_OTHER_OS_ID, - QCOM_SCM_OCMEM_DEBUG_ID, -}; - -enum qcom_scm_sec_dev_id { - QCOM_SCM_MDSS_DEV_ID = 1, - QCOM_SCM_OCMEM_DEV_ID = 5, - QCOM_SCM_PCIE0_DEV_ID = 11, - QCOM_SCM_PCIE1_DEV_ID = 12, - QCOM_SCM_GFX_DEV_ID = 18, - QCOM_SCM_UFS_DEV_ID = 19, - QCOM_SCM_ICE_DEV_ID = 20, -}; - -enum qcom_scm_ice_cipher { - QCOM_SCM_ICE_CIPHER_AES_128_XTS = 0, - QCOM_SCM_ICE_CIPHER_AES_128_CBC = 1, - QCOM_SCM_ICE_CIPHER_AES_256_XTS = 3, - QCOM_SCM_ICE_CIPHER_AES_256_CBC = 4, -}; - -#define QCOM_SCM_PERM_READ 0x4 -#define QCOM_SCM_PERM_WRITE 0x2 -#define QCOM_SCM_PERM_EXEC 0x1 -#define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) -#define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) - -extern bool qcom_scm_is_available(void); - -extern int qcom_scm_set_cold_boot_addr(void *entry); -extern int qcom_scm_set_warm_boot_addr(void *entry); -extern void qcom_scm_cpu_power_down(u32 flags); -extern int qcom_scm_set_remote_state(u32 state, u32 id); - -struct qcom_scm_pas_metadata { - void *ptr; - dma_addr_t phys; - ssize_t size; -}; - -extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size, - struct qcom_scm_pas_metadata *ctx); -void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); -extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size); -extern int qcom_scm_pas_auth_and_reset(u32 peripheral); -extern int qcom_scm_pas_shutdown(u32 peripheral); -extern bool qcom_scm_pas_supported(u32 peripheral); - -extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); -extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); - -extern bool qcom_scm_restore_sec_cfg_available(void); -extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); -extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); -extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); -extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); -extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, - u32 cp_nonpixel_start, - u32 cp_nonpixel_size); -extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, - unsigned int *src, - const struct qcom_scm_vmperm *newvm, - unsigned int dest_cnt); - -extern bool qcom_scm_ocmem_lock_available(void); -extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size, u32 mode); -extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size); - -extern bool qcom_scm_ice_available(void); -extern int qcom_scm_ice_invalidate_key(u32 index); -extern int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, - enum qcom_scm_ice_cipher cipher, - u32 data_unit_size); - -extern bool qcom_scm_hdcp_available(void); -extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp); - -extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); -extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); - -extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, - u64 limit_node, u32 node_id, u64 version); -extern int qcom_scm_lmh_profile_change(u32 profile_id); -extern bool qcom_scm_lmh_dcvsh_available(void); - -#endif -- cgit v1.2.3 From c7d4e6ab3165693342c21f6faf80d983137fee0c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 1 Nov 2022 15:27:43 +0100 Subject: net/mlx5e: Propagate an internal event in case uplink netdev changes Whenever uplink netdev is set/cleared, propagate newly introduced event to inform notifier blocks netdev was added/removed. Move the set() helper to core.c from header, introduce clear() and netdev_added_event_replay() helpers. The last one is going to be called from rdma driver, so export it. Signed-off-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 1 + include/linux/mlx5/driver.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 29d4b201c7b2..f2b271169daf 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -362,6 +362,7 @@ enum mlx5_event { enum mlx5_driver_event { MLX5_DRIVER_EVENT_TYPE_TRAP = 0, + MLX5_DRIVER_EVENT_UPLINK_NETDEV, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d476255c9a3f..cc48aa308269 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -674,6 +675,7 @@ struct mlx5e_resources { } hw_objs; struct devlink_port dl_port; struct net_device *uplink_netdev; + struct mutex uplink_netdev_lock; }; enum mlx5_sw_icm_type { @@ -1011,6 +1013,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); +void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); +void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); + int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); -- cgit v1.2.3 From e0766ea4c8f8f94a605e291aa3672a703dc1d2e4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:12 +0100 Subject: driver core: bus: constantify the bus_find_* functions All of the bus find and iterator functions do not modify the struct bus_type passed to them, so mark them as constant to enforce this rule. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-4-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index e3094db1e9fa..f0c8bf91b07a 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -149,9 +149,9 @@ int device_match_acpi_handle(struct device *dev, const void *handle); int device_match_any(struct device *dev, const void *unused); /* iterator helpers for buses */ -int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, +int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); -struct device *bus_find_device(struct bus_type *bus, struct device *start, +struct device *bus_find_device(const struct bus_type *bus, struct device *start, const void *data, int (*match)(struct device *dev, const void *data)); /** @@ -161,7 +161,7 @@ struct device *bus_find_device(struct bus_type *bus, struct device *start, * @start: Device to begin with * @name: name of the device to match */ -static inline struct device *bus_find_device_by_name(struct bus_type *bus, +static inline struct device *bus_find_device_by_name(const struct bus_type *bus, struct device *start, const char *name) { @@ -175,7 +175,7 @@ static inline struct device *bus_find_device_by_name(struct bus_type *bus, * @np: of_node of the device to match. */ static inline struct device * -bus_find_device_by_of_node(struct bus_type *bus, const struct device_node *np) +bus_find_device_by_of_node(const struct bus_type *bus, const struct device_node *np) { return bus_find_device(bus, NULL, np, device_match_of_node); } @@ -187,7 +187,7 @@ bus_find_device_by_of_node(struct bus_type *bus, const struct device_node *np) * @fwnode: fwnode of the device to match. */ static inline struct device * -bus_find_device_by_fwnode(struct bus_type *bus, const struct fwnode_handle *fwnode) +bus_find_device_by_fwnode(const struct bus_type *bus, const struct fwnode_handle *fwnode) { return bus_find_device(bus, NULL, fwnode, device_match_fwnode); } @@ -198,7 +198,7 @@ bus_find_device_by_fwnode(struct bus_type *bus, const struct fwnode_handle *fwno * @bus: bus type * @devt: device type of the device to match. */ -static inline struct device *bus_find_device_by_devt(struct bus_type *bus, +static inline struct device *bus_find_device_by_devt(const struct bus_type *bus, dev_t devt) { return bus_find_device(bus, NULL, &devt, device_match_devt); @@ -211,7 +211,7 @@ static inline struct device *bus_find_device_by_devt(struct bus_type *bus, * @cur: device to begin the search with. */ static inline struct device * -bus_find_next_device(struct bus_type *bus,struct device *cur) +bus_find_next_device(const struct bus_type *bus,struct device *cur) { return bus_find_device(bus, cur, NULL, device_match_any); } @@ -226,19 +226,19 @@ struct acpi_device; * @adev: ACPI COMPANION device to match. */ static inline struct device * -bus_find_device_by_acpi_dev(struct bus_type *bus, const struct acpi_device *adev) +bus_find_device_by_acpi_dev(const struct bus_type *bus, const struct acpi_device *adev) { return bus_find_device(bus, NULL, adev, device_match_acpi_dev); } #else static inline struct device * -bus_find_device_by_acpi_dev(struct bus_type *bus, const void *adev) +bus_find_device_by_acpi_dev(const struct bus_type *bus, const void *adev) { return NULL; } #endif -int bus_for_each_drv(struct bus_type *bus, struct device_driver *start, +int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)); void bus_sort_breadthfirst(struct bus_type *bus, int (*compare)(const struct device *a, -- cgit v1.2.3 From 0396f2863f7af3c588033d270f7d979d11cd4708 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:13 +0100 Subject: driver core: bus: convert bus_create/remove_file to be constant bus_create_file() and bus_remove_file() can be made to take a constant bus pointer, as it should not be modifying anything in the bus structure. Make this change and move the functions to use the internal subsys_get/put() logic as well, to prevent the use of the back-pointer in struct bus_type. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-5-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index f0c8bf91b07a..f6537f5fc535 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -135,9 +135,8 @@ struct bus_attribute { #define BUS_ATTR_WO(_name) \ struct bus_attribute bus_attr_##_name = __ATTR_WO(_name) -extern int __must_check bus_create_file(struct bus_type *, - struct bus_attribute *); -extern void bus_remove_file(struct bus_type *, struct bus_attribute *); +int __must_check bus_create_file(const struct bus_type *bus, struct bus_attribute *attr); +void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr); /* Generic device matching functions that all busses can use to match with */ int device_match_name(struct device *dev, const void *name); -- cgit v1.2.3 From d2bf38c088e0d5467a0e8a2055d6f95dff5c2125 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:26 +0100 Subject: driver core: remove private pointer from struct bus_type Now that the driver code has been refactored to not rely on the pointer from a struct bus_type to the private structure it can be safely removed from the structure entirely. This will allow most bus_type structures to now be marked as const. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-18-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index f6537f5fc535..22bf63469275 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -66,8 +66,6 @@ struct fwnode_handle; * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU * driver implementations to a bus and allow the driver to do * bus-specific setup - * @p: The private data of the driver core, only the driver core can - * touch this. * @lock_key: Lock class key for use by the lock validator * @need_parent_lock: When probing or removing a device on this bus, the * device core should lock the device's parent. @@ -111,8 +109,6 @@ struct bus_type { const struct iommu_ops *iommu_ops; - struct subsys_private *p; - bool need_parent_lock; }; -- cgit v1.2.3 From bc8b7931012f0511f016e2f048d1f4222db8e08f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:27 +0100 Subject: driver core: bus: constify bus_register/unregister_notifier() The bus_register_notifier() and bus_unregister_notifier() functions should be taking a const * to bus_type, not just a * so fix that up. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-19-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 22bf63469275..c0a034ff59b7 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -246,9 +246,9 @@ void bus_sort_breadthfirst(struct bus_type *bus, */ struct notifier_block; -extern int bus_register_notifier(struct bus_type *bus, +extern int bus_register_notifier(const struct bus_type *bus, struct notifier_block *nb); -extern int bus_unregister_notifier(struct bus_type *bus, +extern int bus_unregister_notifier(const struct bus_type *bus, struct notifier_block *nb); /** -- cgit v1.2.3 From f91482be9b480dfce2616f5ba3fa548b8ed41efb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:28 +0100 Subject: driver core: bus: constify bus_get_kset() The bus_get_kset() function should be taking a const * to bus_type, not just a * so fix that up. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-20-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index c0a034ff59b7..425d79d6cf69 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -281,6 +281,6 @@ enum bus_notifier_event { BUS_NOTIFY_DRIVER_NOT_BOUND, }; -extern struct kset *bus_get_kset(struct bus_type *bus); +extern struct kset *bus_get_kset(const struct bus_type *bus); #endif -- cgit v1.2.3 From ad8685d0f61a6fc1dc2e5874f4924ff5028c5954 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:30 +0100 Subject: driver core: bus: constify bus_unregister() The bus_unregister() function can now take a const * to bus_type, not just a * so fix that up. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-22-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 425d79d6cf69..31be18608f83 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -114,7 +114,7 @@ struct bus_type { extern int __must_check bus_register(struct bus_type *bus); -extern void bus_unregister(struct bus_type *bus); +extern void bus_unregister(const struct bus_type *bus); extern int __must_check bus_rescan_devices(struct bus_type *bus); -- cgit v1.2.3 From 491581f40e4c52c4b36c83e8c75b2210ed7c358a Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Fri, 3 Feb 2023 13:01:32 -0800 Subject: soc: qcom: geni-se: Move qcom-geni-se.h to linux/soc/qcom/geni-se.h Move include/linux/qcom-geni-se.h to include/linux/soc/qcom/geni-se.h. This removes 1 of a few remaining Qualcomm-specific headers into a more approciate subdirectory under include/. Signed-off-by: Elliot Berman Acked-by: Bjorn Andersson Acked-by: Wolfram Sang # for I2C Reviewed-by: Guru Das Srinagesh Link: https://lore.kernel.org/r/20230203210133.3552796-1-quic_eberman@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/qcom-geni-se.h | 482 --------------------------------------- include/linux/soc/qcom/geni-se.h | 482 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 482 insertions(+), 482 deletions(-) delete mode 100644 include/linux/qcom-geni-se.h create mode 100644 include/linux/soc/qcom/geni-se.h (limited to 'include/linux') diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h deleted file mode 100644 index 400213daa461..000000000000 --- a/include/linux/qcom-geni-se.h +++ /dev/null @@ -1,482 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved. - */ - -#ifndef _LINUX_QCOM_GENI_SE -#define _LINUX_QCOM_GENI_SE - -#include - -/** - * enum geni_se_xfer_mode: Transfer modes supported by Serial Engines - * - * @GENI_SE_INVALID: Invalid mode - * @GENI_SE_FIFO: FIFO mode. Data is transferred with SE FIFO - * by programmed IO method - * @GENI_SE_DMA: Serial Engine DMA mode. Data is transferred - * with SE by DMAengine internal to SE - * @GENI_GPI_DMA: GPI DMA mode. Data is transferred using a DMAengine - * configured by a firmware residing on a GSI engine. This DMA name is - * interchangeably used as GSI or GPI which seem to imply the same DMAengine - */ - -enum geni_se_xfer_mode { - GENI_SE_INVALID, - GENI_SE_FIFO, - GENI_SE_DMA, - GENI_GPI_DMA, -}; - -/* Protocols supported by GENI Serial Engines */ -enum geni_se_protocol_type { - GENI_SE_NONE, - GENI_SE_SPI, - GENI_SE_UART, - GENI_SE_I2C, - GENI_SE_I3C, -}; - -struct geni_wrapper; -struct clk; - -enum geni_icc_path_index { - GENI_TO_CORE, - CPU_TO_GENI, - GENI_TO_DDR -}; - -struct geni_icc_path { - struct icc_path *path; - unsigned int avg_bw; -}; - -/** - * struct geni_se - GENI Serial Engine - * @base: Base Address of the Serial Engine's register block - * @dev: Pointer to the Serial Engine device - * @wrapper: Pointer to the parent QUP Wrapper core - * @clk: Handle to the core serial engine clock - * @num_clk_levels: Number of valid clock levels in clk_perf_tbl - * @clk_perf_tbl: Table of clock frequency input to serial engine clock - * @icc_paths: Array of ICC paths for SE - */ -struct geni_se { - void __iomem *base; - struct device *dev; - struct geni_wrapper *wrapper; - struct clk *clk; - unsigned int num_clk_levels; - unsigned long *clk_perf_tbl; - struct geni_icc_path icc_paths[3]; -}; - -/* Common SE registers */ -#define GENI_FORCE_DEFAULT_REG 0x20 -#define SE_GENI_STATUS 0x40 -#define GENI_SER_M_CLK_CFG 0x48 -#define GENI_SER_S_CLK_CFG 0x4c -#define GENI_IF_DISABLE_RO 0x64 -#define GENI_FW_REVISION_RO 0x68 -#define SE_GENI_CLK_SEL 0x7c -#define SE_GENI_DMA_MODE_EN 0x258 -#define SE_GENI_M_CMD0 0x600 -#define SE_GENI_M_CMD_CTRL_REG 0x604 -#define SE_GENI_M_IRQ_STATUS 0x610 -#define SE_GENI_M_IRQ_EN 0x614 -#define SE_GENI_M_IRQ_CLEAR 0x618 -#define SE_GENI_S_CMD0 0x630 -#define SE_GENI_S_CMD_CTRL_REG 0x634 -#define SE_GENI_S_IRQ_STATUS 0x640 -#define SE_GENI_S_IRQ_EN 0x644 -#define SE_GENI_S_IRQ_CLEAR 0x648 -#define SE_GENI_TX_FIFOn 0x700 -#define SE_GENI_RX_FIFOn 0x780 -#define SE_GENI_TX_FIFO_STATUS 0x800 -#define SE_GENI_RX_FIFO_STATUS 0x804 -#define SE_GENI_TX_WATERMARK_REG 0x80c -#define SE_GENI_RX_WATERMARK_REG 0x810 -#define SE_GENI_RX_RFR_WATERMARK_REG 0x814 -#define SE_GENI_IOS 0x908 -#define SE_DMA_TX_IRQ_STAT 0xc40 -#define SE_DMA_TX_IRQ_CLR 0xc44 -#define SE_DMA_TX_FSM_RST 0xc58 -#define SE_DMA_RX_IRQ_STAT 0xd40 -#define SE_DMA_RX_IRQ_CLR 0xd44 -#define SE_DMA_RX_LEN_IN 0xd54 -#define SE_DMA_RX_FSM_RST 0xd58 -#define SE_HW_PARAM_0 0xe24 -#define SE_HW_PARAM_1 0xe28 - -/* GENI_FORCE_DEFAULT_REG fields */ -#define FORCE_DEFAULT BIT(0) - -/* GENI_STATUS fields */ -#define M_GENI_CMD_ACTIVE BIT(0) -#define S_GENI_CMD_ACTIVE BIT(12) - -/* GENI_SER_M_CLK_CFG/GENI_SER_S_CLK_CFG */ -#define SER_CLK_EN BIT(0) -#define CLK_DIV_MSK GENMASK(15, 4) -#define CLK_DIV_SHFT 4 - -/* GENI_IF_DISABLE_RO fields */ -#define FIFO_IF_DISABLE (BIT(0)) - -/* GENI_FW_REVISION_RO fields */ -#define FW_REV_PROTOCOL_MSK GENMASK(15, 8) -#define FW_REV_PROTOCOL_SHFT 8 - -/* GENI_CLK_SEL fields */ -#define CLK_SEL_MSK GENMASK(2, 0) - -/* SE_GENI_DMA_MODE_EN */ -#define GENI_DMA_MODE_EN BIT(0) - -/* GENI_M_CMD0 fields */ -#define M_OPCODE_MSK GENMASK(31, 27) -#define M_OPCODE_SHFT 27 -#define M_PARAMS_MSK GENMASK(26, 0) - -/* GENI_M_CMD_CTRL_REG */ -#define M_GENI_CMD_CANCEL BIT(2) -#define M_GENI_CMD_ABORT BIT(1) -#define M_GENI_DISABLE BIT(0) - -/* GENI_S_CMD0 fields */ -#define S_OPCODE_MSK GENMASK(31, 27) -#define S_OPCODE_SHFT 27 -#define S_PARAMS_MSK GENMASK(26, 0) - -/* GENI_S_CMD_CTRL_REG */ -#define S_GENI_CMD_CANCEL BIT(2) -#define S_GENI_CMD_ABORT BIT(1) -#define S_GENI_DISABLE BIT(0) - -/* GENI_M_IRQ_EN fields */ -#define M_CMD_DONE_EN BIT(0) -#define M_CMD_OVERRUN_EN BIT(1) -#define M_ILLEGAL_CMD_EN BIT(2) -#define M_CMD_FAILURE_EN BIT(3) -#define M_CMD_CANCEL_EN BIT(4) -#define M_CMD_ABORT_EN BIT(5) -#define M_TIMESTAMP_EN BIT(6) -#define M_RX_IRQ_EN BIT(7) -#define M_GP_SYNC_IRQ_0_EN BIT(8) -#define M_GP_IRQ_0_EN BIT(9) -#define M_GP_IRQ_1_EN BIT(10) -#define M_GP_IRQ_2_EN BIT(11) -#define M_GP_IRQ_3_EN BIT(12) -#define M_GP_IRQ_4_EN BIT(13) -#define M_GP_IRQ_5_EN BIT(14) -#define M_IO_DATA_DEASSERT_EN BIT(22) -#define M_IO_DATA_ASSERT_EN BIT(23) -#define M_RX_FIFO_RD_ERR_EN BIT(24) -#define M_RX_FIFO_WR_ERR_EN BIT(25) -#define M_RX_FIFO_WATERMARK_EN BIT(26) -#define M_RX_FIFO_LAST_EN BIT(27) -#define M_TX_FIFO_RD_ERR_EN BIT(28) -#define M_TX_FIFO_WR_ERR_EN BIT(29) -#define M_TX_FIFO_WATERMARK_EN BIT(30) -#define M_SEC_IRQ_EN BIT(31) -#define M_COMMON_GENI_M_IRQ_EN (GENMASK(6, 1) | \ - M_IO_DATA_DEASSERT_EN | \ - M_IO_DATA_ASSERT_EN | M_RX_FIFO_RD_ERR_EN | \ - M_RX_FIFO_WR_ERR_EN | M_TX_FIFO_RD_ERR_EN | \ - M_TX_FIFO_WR_ERR_EN) - -/* GENI_S_IRQ_EN fields */ -#define S_CMD_DONE_EN BIT(0) -#define S_CMD_OVERRUN_EN BIT(1) -#define S_ILLEGAL_CMD_EN BIT(2) -#define S_CMD_FAILURE_EN BIT(3) -#define S_CMD_CANCEL_EN BIT(4) -#define S_CMD_ABORT_EN BIT(5) -#define S_GP_SYNC_IRQ_0_EN BIT(8) -#define S_GP_IRQ_0_EN BIT(9) -#define S_GP_IRQ_1_EN BIT(10) -#define S_GP_IRQ_2_EN BIT(11) -#define S_GP_IRQ_3_EN BIT(12) -#define S_GP_IRQ_4_EN BIT(13) -#define S_GP_IRQ_5_EN BIT(14) -#define S_IO_DATA_DEASSERT_EN BIT(22) -#define S_IO_DATA_ASSERT_EN BIT(23) -#define S_RX_FIFO_RD_ERR_EN BIT(24) -#define S_RX_FIFO_WR_ERR_EN BIT(25) -#define S_RX_FIFO_WATERMARK_EN BIT(26) -#define S_RX_FIFO_LAST_EN BIT(27) -#define S_COMMON_GENI_S_IRQ_EN (GENMASK(5, 1) | GENMASK(13, 9) | \ - S_RX_FIFO_RD_ERR_EN | S_RX_FIFO_WR_ERR_EN) - -/* GENI_/TX/RX/RX_RFR/_WATERMARK_REG fields */ -#define WATERMARK_MSK GENMASK(5, 0) - -/* GENI_TX_FIFO_STATUS fields */ -#define TX_FIFO_WC GENMASK(27, 0) - -/* GENI_RX_FIFO_STATUS fields */ -#define RX_LAST BIT(31) -#define RX_LAST_BYTE_VALID_MSK GENMASK(30, 28) -#define RX_LAST_BYTE_VALID_SHFT 28 -#define RX_FIFO_WC_MSK GENMASK(24, 0) - -/* SE_GENI_IOS fields */ -#define IO2_DATA_IN BIT(1) -#define RX_DATA_IN BIT(0) - -/* SE_DMA_TX_IRQ_STAT Register fields */ -#define TX_DMA_DONE BIT(0) -#define TX_EOT BIT(1) -#define TX_SBE BIT(2) -#define TX_RESET_DONE BIT(3) - -/* SE_DMA_RX_IRQ_STAT Register fields */ -#define RX_DMA_DONE BIT(0) -#define RX_EOT BIT(1) -#define RX_SBE BIT(2) -#define RX_RESET_DONE BIT(3) -#define RX_FLUSH_DONE BIT(4) -#define RX_DMA_PARITY_ERR BIT(5) -#define RX_DMA_BREAK GENMASK(8, 7) -#define RX_GENI_GP_IRQ GENMASK(10, 5) -#define RX_GENI_CANCEL_IRQ BIT(11) -#define RX_GENI_GP_IRQ_EXT GENMASK(13, 12) - -/* SE_HW_PARAM_0 fields */ -#define TX_FIFO_WIDTH_MSK GENMASK(29, 24) -#define TX_FIFO_WIDTH_SHFT 24 -#define TX_FIFO_DEPTH_MSK GENMASK(21, 16) -#define TX_FIFO_DEPTH_SHFT 16 - -/* SE_HW_PARAM_1 fields */ -#define RX_FIFO_WIDTH_MSK GENMASK(29, 24) -#define RX_FIFO_WIDTH_SHFT 24 -#define RX_FIFO_DEPTH_MSK GENMASK(21, 16) -#define RX_FIFO_DEPTH_SHFT 16 - -#define HW_VER_MAJOR_MASK GENMASK(31, 28) -#define HW_VER_MAJOR_SHFT 28 -#define HW_VER_MINOR_MASK GENMASK(27, 16) -#define HW_VER_MINOR_SHFT 16 -#define HW_VER_STEP_MASK GENMASK(15, 0) - -#define GENI_SE_VERSION_MAJOR(ver) ((ver & HW_VER_MAJOR_MASK) >> HW_VER_MAJOR_SHFT) -#define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT) -#define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK) - -/* QUP SE VERSION value for major number 2 and minor number 5 */ -#define QUP_SE_VERSION_2_5 0x20050000 - -/* - * Define bandwidth thresholds that cause the underlying Core 2X interconnect - * clock to run at the named frequency. These baseline values are recommended - * by the hardware team, and are not dynamically scaled with GENI bandwidth - * beyond basic on/off. - */ -#define CORE_2X_19_2_MHZ 960 -#define CORE_2X_50_MHZ 2500 -#define CORE_2X_100_MHZ 5000 -#define CORE_2X_150_MHZ 7500 -#define CORE_2X_200_MHZ 10000 -#define CORE_2X_236_MHZ 16383 - -#define GENI_DEFAULT_BW Bps_to_icc(1000) - -#if IS_ENABLED(CONFIG_QCOM_GENI_SE) - -u32 geni_se_get_qup_hw_version(struct geni_se *se); - -/** - * geni_se_read_proto() - Read the protocol configured for a serial engine - * @se: Pointer to the concerned serial engine. - * - * Return: Protocol value as configured in the serial engine. - */ -static inline u32 geni_se_read_proto(struct geni_se *se) -{ - u32 val; - - val = readl_relaxed(se->base + GENI_FW_REVISION_RO); - - return (val & FW_REV_PROTOCOL_MSK) >> FW_REV_PROTOCOL_SHFT; -} - -/** - * geni_se_setup_m_cmd() - Setup the primary sequencer - * @se: Pointer to the concerned serial engine. - * @cmd: Command/Operation to setup in the primary sequencer. - * @params: Parameter for the sequencer command. - * - * This function is used to configure the primary sequencer with the - * command and its associated parameters. - */ -static inline void geni_se_setup_m_cmd(struct geni_se *se, u32 cmd, u32 params) -{ - u32 m_cmd; - - m_cmd = (cmd << M_OPCODE_SHFT) | (params & M_PARAMS_MSK); - writel(m_cmd, se->base + SE_GENI_M_CMD0); -} - -/** - * geni_se_setup_s_cmd() - Setup the secondary sequencer - * @se: Pointer to the concerned serial engine. - * @cmd: Command/Operation to setup in the secondary sequencer. - * @params: Parameter for the sequencer command. - * - * This function is used to configure the secondary sequencer with the - * command and its associated parameters. - */ -static inline void geni_se_setup_s_cmd(struct geni_se *se, u32 cmd, u32 params) -{ - u32 s_cmd; - - s_cmd = readl_relaxed(se->base + SE_GENI_S_CMD0); - s_cmd &= ~(S_OPCODE_MSK | S_PARAMS_MSK); - s_cmd |= (cmd << S_OPCODE_SHFT); - s_cmd |= (params & S_PARAMS_MSK); - writel(s_cmd, se->base + SE_GENI_S_CMD0); -} - -/** - * geni_se_cancel_m_cmd() - Cancel the command configured in the primary - * sequencer - * @se: Pointer to the concerned serial engine. - * - * This function is used to cancel the currently configured command in the - * primary sequencer. - */ -static inline void geni_se_cancel_m_cmd(struct geni_se *se) -{ - writel_relaxed(M_GENI_CMD_CANCEL, se->base + SE_GENI_M_CMD_CTRL_REG); -} - -/** - * geni_se_cancel_s_cmd() - Cancel the command configured in the secondary - * sequencer - * @se: Pointer to the concerned serial engine. - * - * This function is used to cancel the currently configured command in the - * secondary sequencer. - */ -static inline void geni_se_cancel_s_cmd(struct geni_se *se) -{ - writel_relaxed(S_GENI_CMD_CANCEL, se->base + SE_GENI_S_CMD_CTRL_REG); -} - -/** - * geni_se_abort_m_cmd() - Abort the command configured in the primary sequencer - * @se: Pointer to the concerned serial engine. - * - * This function is used to force abort the currently configured command in the - * primary sequencer. - */ -static inline void geni_se_abort_m_cmd(struct geni_se *se) -{ - writel_relaxed(M_GENI_CMD_ABORT, se->base + SE_GENI_M_CMD_CTRL_REG); -} - -/** - * geni_se_abort_s_cmd() - Abort the command configured in the secondary - * sequencer - * @se: Pointer to the concerned serial engine. - * - * This function is used to force abort the currently configured command in the - * secondary sequencer. - */ -static inline void geni_se_abort_s_cmd(struct geni_se *se) -{ - writel_relaxed(S_GENI_CMD_ABORT, se->base + SE_GENI_S_CMD_CTRL_REG); -} - -/** - * geni_se_get_tx_fifo_depth() - Get the TX fifo depth of the serial engine - * @se: Pointer to the concerned serial engine. - * - * This function is used to get the depth i.e. number of elements in the - * TX fifo of the serial engine. - * - * Return: TX fifo depth in units of FIFO words. - */ -static inline u32 geni_se_get_tx_fifo_depth(struct geni_se *se) -{ - u32 val; - - val = readl_relaxed(se->base + SE_HW_PARAM_0); - - return (val & TX_FIFO_DEPTH_MSK) >> TX_FIFO_DEPTH_SHFT; -} - -/** - * geni_se_get_tx_fifo_width() - Get the TX fifo width of the serial engine - * @se: Pointer to the concerned serial engine. - * - * This function is used to get the width i.e. word size per element in the - * TX fifo of the serial engine. - * - * Return: TX fifo width in bits - */ -static inline u32 geni_se_get_tx_fifo_width(struct geni_se *se) -{ - u32 val; - - val = readl_relaxed(se->base + SE_HW_PARAM_0); - - return (val & TX_FIFO_WIDTH_MSK) >> TX_FIFO_WIDTH_SHFT; -} - -/** - * geni_se_get_rx_fifo_depth() - Get the RX fifo depth of the serial engine - * @se: Pointer to the concerned serial engine. - * - * This function is used to get the depth i.e. number of elements in the - * RX fifo of the serial engine. - * - * Return: RX fifo depth in units of FIFO words - */ -static inline u32 geni_se_get_rx_fifo_depth(struct geni_se *se) -{ - u32 val; - - val = readl_relaxed(se->base + SE_HW_PARAM_1); - - return (val & RX_FIFO_DEPTH_MSK) >> RX_FIFO_DEPTH_SHFT; -} - -void geni_se_init(struct geni_se *se, u32 rx_wm, u32 rx_rfr); - -void geni_se_select_mode(struct geni_se *se, enum geni_se_xfer_mode mode); - -void geni_se_config_packing(struct geni_se *se, int bpw, int pack_words, - bool msb_to_lsb, bool tx_cfg, bool rx_cfg); - -int geni_se_resources_off(struct geni_se *se); - -int geni_se_resources_on(struct geni_se *se); - -int geni_se_clk_tbl_get(struct geni_se *se, unsigned long **tbl); - -int geni_se_clk_freq_match(struct geni_se *se, unsigned long req_freq, - unsigned int *index, unsigned long *res_freq, - bool exact); - -int geni_se_tx_dma_prep(struct geni_se *se, void *buf, size_t len, - dma_addr_t *iova); - -int geni_se_rx_dma_prep(struct geni_se *se, void *buf, size_t len, - dma_addr_t *iova); - -void geni_se_tx_dma_unprep(struct geni_se *se, dma_addr_t iova, size_t len); - -void geni_se_rx_dma_unprep(struct geni_se *se, dma_addr_t iova, size_t len); - -int geni_icc_get(struct geni_se *se, const char *icc_ddr); - -int geni_icc_set_bw(struct geni_se *se); -void geni_icc_set_tag(struct geni_se *se, u32 tag); - -int geni_icc_enable(struct geni_se *se); - -int geni_icc_disable(struct geni_se *se); -#endif -#endif diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h new file mode 100644 index 000000000000..400213daa461 --- /dev/null +++ b/include/linux/soc/qcom/geni-se.h @@ -0,0 +1,482 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved. + */ + +#ifndef _LINUX_QCOM_GENI_SE +#define _LINUX_QCOM_GENI_SE + +#include + +/** + * enum geni_se_xfer_mode: Transfer modes supported by Serial Engines + * + * @GENI_SE_INVALID: Invalid mode + * @GENI_SE_FIFO: FIFO mode. Data is transferred with SE FIFO + * by programmed IO method + * @GENI_SE_DMA: Serial Engine DMA mode. Data is transferred + * with SE by DMAengine internal to SE + * @GENI_GPI_DMA: GPI DMA mode. Data is transferred using a DMAengine + * configured by a firmware residing on a GSI engine. This DMA name is + * interchangeably used as GSI or GPI which seem to imply the same DMAengine + */ + +enum geni_se_xfer_mode { + GENI_SE_INVALID, + GENI_SE_FIFO, + GENI_SE_DMA, + GENI_GPI_DMA, +}; + +/* Protocols supported by GENI Serial Engines */ +enum geni_se_protocol_type { + GENI_SE_NONE, + GENI_SE_SPI, + GENI_SE_UART, + GENI_SE_I2C, + GENI_SE_I3C, +}; + +struct geni_wrapper; +struct clk; + +enum geni_icc_path_index { + GENI_TO_CORE, + CPU_TO_GENI, + GENI_TO_DDR +}; + +struct geni_icc_path { + struct icc_path *path; + unsigned int avg_bw; +}; + +/** + * struct geni_se - GENI Serial Engine + * @base: Base Address of the Serial Engine's register block + * @dev: Pointer to the Serial Engine device + * @wrapper: Pointer to the parent QUP Wrapper core + * @clk: Handle to the core serial engine clock + * @num_clk_levels: Number of valid clock levels in clk_perf_tbl + * @clk_perf_tbl: Table of clock frequency input to serial engine clock + * @icc_paths: Array of ICC paths for SE + */ +struct geni_se { + void __iomem *base; + struct device *dev; + struct geni_wrapper *wrapper; + struct clk *clk; + unsigned int num_clk_levels; + unsigned long *clk_perf_tbl; + struct geni_icc_path icc_paths[3]; +}; + +/* Common SE registers */ +#define GENI_FORCE_DEFAULT_REG 0x20 +#define SE_GENI_STATUS 0x40 +#define GENI_SER_M_CLK_CFG 0x48 +#define GENI_SER_S_CLK_CFG 0x4c +#define GENI_IF_DISABLE_RO 0x64 +#define GENI_FW_REVISION_RO 0x68 +#define SE_GENI_CLK_SEL 0x7c +#define SE_GENI_DMA_MODE_EN 0x258 +#define SE_GENI_M_CMD0 0x600 +#define SE_GENI_M_CMD_CTRL_REG 0x604 +#define SE_GENI_M_IRQ_STATUS 0x610 +#define SE_GENI_M_IRQ_EN 0x614 +#define SE_GENI_M_IRQ_CLEAR 0x618 +#define SE_GENI_S_CMD0 0x630 +#define SE_GENI_S_CMD_CTRL_REG 0x634 +#define SE_GENI_S_IRQ_STATUS 0x640 +#define SE_GENI_S_IRQ_EN 0x644 +#define SE_GENI_S_IRQ_CLEAR 0x648 +#define SE_GENI_TX_FIFOn 0x700 +#define SE_GENI_RX_FIFOn 0x780 +#define SE_GENI_TX_FIFO_STATUS 0x800 +#define SE_GENI_RX_FIFO_STATUS 0x804 +#define SE_GENI_TX_WATERMARK_REG 0x80c +#define SE_GENI_RX_WATERMARK_REG 0x810 +#define SE_GENI_RX_RFR_WATERMARK_REG 0x814 +#define SE_GENI_IOS 0x908 +#define SE_DMA_TX_IRQ_STAT 0xc40 +#define SE_DMA_TX_IRQ_CLR 0xc44 +#define SE_DMA_TX_FSM_RST 0xc58 +#define SE_DMA_RX_IRQ_STAT 0xd40 +#define SE_DMA_RX_IRQ_CLR 0xd44 +#define SE_DMA_RX_LEN_IN 0xd54 +#define SE_DMA_RX_FSM_RST 0xd58 +#define SE_HW_PARAM_0 0xe24 +#define SE_HW_PARAM_1 0xe28 + +/* GENI_FORCE_DEFAULT_REG fields */ +#define FORCE_DEFAULT BIT(0) + +/* GENI_STATUS fields */ +#define M_GENI_CMD_ACTIVE BIT(0) +#define S_GENI_CMD_ACTIVE BIT(12) + +/* GENI_SER_M_CLK_CFG/GENI_SER_S_CLK_CFG */ +#define SER_CLK_EN BIT(0) +#define CLK_DIV_MSK GENMASK(15, 4) +#define CLK_DIV_SHFT 4 + +/* GENI_IF_DISABLE_RO fields */ +#define FIFO_IF_DISABLE (BIT(0)) + +/* GENI_FW_REVISION_RO fields */ +#define FW_REV_PROTOCOL_MSK GENMASK(15, 8) +#define FW_REV_PROTOCOL_SHFT 8 + +/* GENI_CLK_SEL fields */ +#define CLK_SEL_MSK GENMASK(2, 0) + +/* SE_GENI_DMA_MODE_EN */ +#define GENI_DMA_MODE_EN BIT(0) + +/* GENI_M_CMD0 fields */ +#define M_OPCODE_MSK GENMASK(31, 27) +#define M_OPCODE_SHFT 27 +#define M_PARAMS_MSK GENMASK(26, 0) + +/* GENI_M_CMD_CTRL_REG */ +#define M_GENI_CMD_CANCEL BIT(2) +#define M_GENI_CMD_ABORT BIT(1) +#define M_GENI_DISABLE BIT(0) + +/* GENI_S_CMD0 fields */ +#define S_OPCODE_MSK GENMASK(31, 27) +#define S_OPCODE_SHFT 27 +#define S_PARAMS_MSK GENMASK(26, 0) + +/* GENI_S_CMD_CTRL_REG */ +#define S_GENI_CMD_CANCEL BIT(2) +#define S_GENI_CMD_ABORT BIT(1) +#define S_GENI_DISABLE BIT(0) + +/* GENI_M_IRQ_EN fields */ +#define M_CMD_DONE_EN BIT(0) +#define M_CMD_OVERRUN_EN BIT(1) +#define M_ILLEGAL_CMD_EN BIT(2) +#define M_CMD_FAILURE_EN BIT(3) +#define M_CMD_CANCEL_EN BIT(4) +#define M_CMD_ABORT_EN BIT(5) +#define M_TIMESTAMP_EN BIT(6) +#define M_RX_IRQ_EN BIT(7) +#define M_GP_SYNC_IRQ_0_EN BIT(8) +#define M_GP_IRQ_0_EN BIT(9) +#define M_GP_IRQ_1_EN BIT(10) +#define M_GP_IRQ_2_EN BIT(11) +#define M_GP_IRQ_3_EN BIT(12) +#define M_GP_IRQ_4_EN BIT(13) +#define M_GP_IRQ_5_EN BIT(14) +#define M_IO_DATA_DEASSERT_EN BIT(22) +#define M_IO_DATA_ASSERT_EN BIT(23) +#define M_RX_FIFO_RD_ERR_EN BIT(24) +#define M_RX_FIFO_WR_ERR_EN BIT(25) +#define M_RX_FIFO_WATERMARK_EN BIT(26) +#define M_RX_FIFO_LAST_EN BIT(27) +#define M_TX_FIFO_RD_ERR_EN BIT(28) +#define M_TX_FIFO_WR_ERR_EN BIT(29) +#define M_TX_FIFO_WATERMARK_EN BIT(30) +#define M_SEC_IRQ_EN BIT(31) +#define M_COMMON_GENI_M_IRQ_EN (GENMASK(6, 1) | \ + M_IO_DATA_DEASSERT_EN | \ + M_IO_DATA_ASSERT_EN | M_RX_FIFO_RD_ERR_EN | \ + M_RX_FIFO_WR_ERR_EN | M_TX_FIFO_RD_ERR_EN | \ + M_TX_FIFO_WR_ERR_EN) + +/* GENI_S_IRQ_EN fields */ +#define S_CMD_DONE_EN BIT(0) +#define S_CMD_OVERRUN_EN BIT(1) +#define S_ILLEGAL_CMD_EN BIT(2) +#define S_CMD_FAILURE_EN BIT(3) +#define S_CMD_CANCEL_EN BIT(4) +#define S_CMD_ABORT_EN BIT(5) +#define S_GP_SYNC_IRQ_0_EN BIT(8) +#define S_GP_IRQ_0_EN BIT(9) +#define S_GP_IRQ_1_EN BIT(10) +#define S_GP_IRQ_2_EN BIT(11) +#define S_GP_IRQ_3_EN BIT(12) +#define S_GP_IRQ_4_EN BIT(13) +#define S_GP_IRQ_5_EN BIT(14) +#define S_IO_DATA_DEASSERT_EN BIT(22) +#define S_IO_DATA_ASSERT_EN BIT(23) +#define S_RX_FIFO_RD_ERR_EN BIT(24) +#define S_RX_FIFO_WR_ERR_EN BIT(25) +#define S_RX_FIFO_WATERMARK_EN BIT(26) +#define S_RX_FIFO_LAST_EN BIT(27) +#define S_COMMON_GENI_S_IRQ_EN (GENMASK(5, 1) | GENMASK(13, 9) | \ + S_RX_FIFO_RD_ERR_EN | S_RX_FIFO_WR_ERR_EN) + +/* GENI_/TX/RX/RX_RFR/_WATERMARK_REG fields */ +#define WATERMARK_MSK GENMASK(5, 0) + +/* GENI_TX_FIFO_STATUS fields */ +#define TX_FIFO_WC GENMASK(27, 0) + +/* GENI_RX_FIFO_STATUS fields */ +#define RX_LAST BIT(31) +#define RX_LAST_BYTE_VALID_MSK GENMASK(30, 28) +#define RX_LAST_BYTE_VALID_SHFT 28 +#define RX_FIFO_WC_MSK GENMASK(24, 0) + +/* SE_GENI_IOS fields */ +#define IO2_DATA_IN BIT(1) +#define RX_DATA_IN BIT(0) + +/* SE_DMA_TX_IRQ_STAT Register fields */ +#define TX_DMA_DONE BIT(0) +#define TX_EOT BIT(1) +#define TX_SBE BIT(2) +#define TX_RESET_DONE BIT(3) + +/* SE_DMA_RX_IRQ_STAT Register fields */ +#define RX_DMA_DONE BIT(0) +#define RX_EOT BIT(1) +#define RX_SBE BIT(2) +#define RX_RESET_DONE BIT(3) +#define RX_FLUSH_DONE BIT(4) +#define RX_DMA_PARITY_ERR BIT(5) +#define RX_DMA_BREAK GENMASK(8, 7) +#define RX_GENI_GP_IRQ GENMASK(10, 5) +#define RX_GENI_CANCEL_IRQ BIT(11) +#define RX_GENI_GP_IRQ_EXT GENMASK(13, 12) + +/* SE_HW_PARAM_0 fields */ +#define TX_FIFO_WIDTH_MSK GENMASK(29, 24) +#define TX_FIFO_WIDTH_SHFT 24 +#define TX_FIFO_DEPTH_MSK GENMASK(21, 16) +#define TX_FIFO_DEPTH_SHFT 16 + +/* SE_HW_PARAM_1 fields */ +#define RX_FIFO_WIDTH_MSK GENMASK(29, 24) +#define RX_FIFO_WIDTH_SHFT 24 +#define RX_FIFO_DEPTH_MSK GENMASK(21, 16) +#define RX_FIFO_DEPTH_SHFT 16 + +#define HW_VER_MAJOR_MASK GENMASK(31, 28) +#define HW_VER_MAJOR_SHFT 28 +#define HW_VER_MINOR_MASK GENMASK(27, 16) +#define HW_VER_MINOR_SHFT 16 +#define HW_VER_STEP_MASK GENMASK(15, 0) + +#define GENI_SE_VERSION_MAJOR(ver) ((ver & HW_VER_MAJOR_MASK) >> HW_VER_MAJOR_SHFT) +#define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT) +#define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK) + +/* QUP SE VERSION value for major number 2 and minor number 5 */ +#define QUP_SE_VERSION_2_5 0x20050000 + +/* + * Define bandwidth thresholds that cause the underlying Core 2X interconnect + * clock to run at the named frequency. These baseline values are recommended + * by the hardware team, and are not dynamically scaled with GENI bandwidth + * beyond basic on/off. + */ +#define CORE_2X_19_2_MHZ 960 +#define CORE_2X_50_MHZ 2500 +#define CORE_2X_100_MHZ 5000 +#define CORE_2X_150_MHZ 7500 +#define CORE_2X_200_MHZ 10000 +#define CORE_2X_236_MHZ 16383 + +#define GENI_DEFAULT_BW Bps_to_icc(1000) + +#if IS_ENABLED(CONFIG_QCOM_GENI_SE) + +u32 geni_se_get_qup_hw_version(struct geni_se *se); + +/** + * geni_se_read_proto() - Read the protocol configured for a serial engine + * @se: Pointer to the concerned serial engine. + * + * Return: Protocol value as configured in the serial engine. + */ +static inline u32 geni_se_read_proto(struct geni_se *se) +{ + u32 val; + + val = readl_relaxed(se->base + GENI_FW_REVISION_RO); + + return (val & FW_REV_PROTOCOL_MSK) >> FW_REV_PROTOCOL_SHFT; +} + +/** + * geni_se_setup_m_cmd() - Setup the primary sequencer + * @se: Pointer to the concerned serial engine. + * @cmd: Command/Operation to setup in the primary sequencer. + * @params: Parameter for the sequencer command. + * + * This function is used to configure the primary sequencer with the + * command and its associated parameters. + */ +static inline void geni_se_setup_m_cmd(struct geni_se *se, u32 cmd, u32 params) +{ + u32 m_cmd; + + m_cmd = (cmd << M_OPCODE_SHFT) | (params & M_PARAMS_MSK); + writel(m_cmd, se->base + SE_GENI_M_CMD0); +} + +/** + * geni_se_setup_s_cmd() - Setup the secondary sequencer + * @se: Pointer to the concerned serial engine. + * @cmd: Command/Operation to setup in the secondary sequencer. + * @params: Parameter for the sequencer command. + * + * This function is used to configure the secondary sequencer with the + * command and its associated parameters. + */ +static inline void geni_se_setup_s_cmd(struct geni_se *se, u32 cmd, u32 params) +{ + u32 s_cmd; + + s_cmd = readl_relaxed(se->base + SE_GENI_S_CMD0); + s_cmd &= ~(S_OPCODE_MSK | S_PARAMS_MSK); + s_cmd |= (cmd << S_OPCODE_SHFT); + s_cmd |= (params & S_PARAMS_MSK); + writel(s_cmd, se->base + SE_GENI_S_CMD0); +} + +/** + * geni_se_cancel_m_cmd() - Cancel the command configured in the primary + * sequencer + * @se: Pointer to the concerned serial engine. + * + * This function is used to cancel the currently configured command in the + * primary sequencer. + */ +static inline void geni_se_cancel_m_cmd(struct geni_se *se) +{ + writel_relaxed(M_GENI_CMD_CANCEL, se->base + SE_GENI_M_CMD_CTRL_REG); +} + +/** + * geni_se_cancel_s_cmd() - Cancel the command configured in the secondary + * sequencer + * @se: Pointer to the concerned serial engine. + * + * This function is used to cancel the currently configured command in the + * secondary sequencer. + */ +static inline void geni_se_cancel_s_cmd(struct geni_se *se) +{ + writel_relaxed(S_GENI_CMD_CANCEL, se->base + SE_GENI_S_CMD_CTRL_REG); +} + +/** + * geni_se_abort_m_cmd() - Abort the command configured in the primary sequencer + * @se: Pointer to the concerned serial engine. + * + * This function is used to force abort the currently configured command in the + * primary sequencer. + */ +static inline void geni_se_abort_m_cmd(struct geni_se *se) +{ + writel_relaxed(M_GENI_CMD_ABORT, se->base + SE_GENI_M_CMD_CTRL_REG); +} + +/** + * geni_se_abort_s_cmd() - Abort the command configured in the secondary + * sequencer + * @se: Pointer to the concerned serial engine. + * + * This function is used to force abort the currently configured command in the + * secondary sequencer. + */ +static inline void geni_se_abort_s_cmd(struct geni_se *se) +{ + writel_relaxed(S_GENI_CMD_ABORT, se->base + SE_GENI_S_CMD_CTRL_REG); +} + +/** + * geni_se_get_tx_fifo_depth() - Get the TX fifo depth of the serial engine + * @se: Pointer to the concerned serial engine. + * + * This function is used to get the depth i.e. number of elements in the + * TX fifo of the serial engine. + * + * Return: TX fifo depth in units of FIFO words. + */ +static inline u32 geni_se_get_tx_fifo_depth(struct geni_se *se) +{ + u32 val; + + val = readl_relaxed(se->base + SE_HW_PARAM_0); + + return (val & TX_FIFO_DEPTH_MSK) >> TX_FIFO_DEPTH_SHFT; +} + +/** + * geni_se_get_tx_fifo_width() - Get the TX fifo width of the serial engine + * @se: Pointer to the concerned serial engine. + * + * This function is used to get the width i.e. word size per element in the + * TX fifo of the serial engine. + * + * Return: TX fifo width in bits + */ +static inline u32 geni_se_get_tx_fifo_width(struct geni_se *se) +{ + u32 val; + + val = readl_relaxed(se->base + SE_HW_PARAM_0); + + return (val & TX_FIFO_WIDTH_MSK) >> TX_FIFO_WIDTH_SHFT; +} + +/** + * geni_se_get_rx_fifo_depth() - Get the RX fifo depth of the serial engine + * @se: Pointer to the concerned serial engine. + * + * This function is used to get the depth i.e. number of elements in the + * RX fifo of the serial engine. + * + * Return: RX fifo depth in units of FIFO words + */ +static inline u32 geni_se_get_rx_fifo_depth(struct geni_se *se) +{ + u32 val; + + val = readl_relaxed(se->base + SE_HW_PARAM_1); + + return (val & RX_FIFO_DEPTH_MSK) >> RX_FIFO_DEPTH_SHFT; +} + +void geni_se_init(struct geni_se *se, u32 rx_wm, u32 rx_rfr); + +void geni_se_select_mode(struct geni_se *se, enum geni_se_xfer_mode mode); + +void geni_se_config_packing(struct geni_se *se, int bpw, int pack_words, + bool msb_to_lsb, bool tx_cfg, bool rx_cfg); + +int geni_se_resources_off(struct geni_se *se); + +int geni_se_resources_on(struct geni_se *se); + +int geni_se_clk_tbl_get(struct geni_se *se, unsigned long **tbl); + +int geni_se_clk_freq_match(struct geni_se *se, unsigned long req_freq, + unsigned int *index, unsigned long *res_freq, + bool exact); + +int geni_se_tx_dma_prep(struct geni_se *se, void *buf, size_t len, + dma_addr_t *iova); + +int geni_se_rx_dma_prep(struct geni_se *se, void *buf, size_t len, + dma_addr_t *iova); + +void geni_se_tx_dma_unprep(struct geni_se *se, dma_addr_t iova, size_t len); + +void geni_se_rx_dma_unprep(struct geni_se *se, dma_addr_t iova, size_t len); + +int geni_icc_get(struct geni_se *se, const char *icc_ddr); + +int geni_icc_set_bw(struct geni_se *se); +void geni_icc_set_tag(struct geni_se *se, u32 tag); + +int geni_icc_enable(struct geni_se *se); + +int geni_icc_disable(struct geni_se *se); +#endif +#endif -- cgit v1.2.3 From 03a86105556e23650e4470c09f91cf7c360d5e28 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 7 Feb 2023 15:03:30 -0800 Subject: HID: retain initial quirks set up when creating HID devices In certain circumstances, such as when creating I2C-connected HID devices, we want to pass and retain some quirks (axis inversion, etc). The source of such quirks may be device tree, or DMI data, or something else not readily available to the HID core itself and therefore cannot be reconstructed easily. To allow this, introduce "initial_quirks" field in hid_device structure and use it when determining the final set of quirks. This fixes the problem with i2c-hid setting up device-tree sourced quirks too late and losing them on device rebind, and also allows to sever the tie between hid-code and i2c-hid when applying DMI-based quirks. Fixes: b60d3c803d76 ("HID: i2c-hid-of: Expose the touchscreen-inverted properties") Fixes: a2f416bf062a ("HID: multitouch: Add quirks for flipped axes") Reviewed-by: Guenter Roeck Tested-by: Allen Ballway Signed-off-by: Dmitry Torokhov Reviewed-by: Alistair Francis Link: https://lore.kernel.org/r/Y+LYwu3Zs13hdVDy@google.com Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 8677ae38599e..48563dc09e17 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -619,6 +619,7 @@ struct hid_device { /* device report descriptor */ unsigned long status; /* see STAT flags above */ unsigned claimed; /* Claimed by hidinput, hiddev? */ unsigned quirks; /* Various quirks the device can pull on us */ + unsigned initial_quirks; /* Initial set of quirks supplied when creating device */ bool io_started; /* If IO has started */ struct list_head inputs; /* The list of inputs */ -- cgit v1.2.3 From 2b48f52f2bff8e8926165983f3a3d7b89b33de08 Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Fri, 3 Feb 2023 16:50:26 -0500 Subject: vfio: fix deadlock between group lock and kvm lock After 51cdc8bc120e, we have another deadlock scenario between the kvm->lock and the vfio group_lock with two different codepaths acquiring the locks in different order. Specifically in vfio_open_device, vfio holds the vfio group_lock when issuing device->ops->open_device but some drivers (like vfio-ap) need to acquire kvm->lock during their open_device routine; Meanwhile, kvm_vfio_release will acquire the kvm->lock first before calling vfio_file_set_kvm which will acquire the vfio group_lock. To resolve this, let's remove the need for the vfio group_lock from the kvm_vfio_release codepath. This is done by introducing a new spinlock to protect modifications to the vfio group kvm pointer, and acquiring a kvm ref from within vfio while holding this spinlock, with the reference held until the last close for the device in question. Fixes: 51cdc8bc120e ("kvm/vfio: Fix potential deadlock on vfio group_lock") Reported-by: Anthony Krowiak Suggested-by: Jason Gunthorpe Signed-off-by: Matthew Rosato Tested-by: Tony Krowiak Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20230203215027.151988-2-mjrosato@linux.ibm.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 35be78e9ae57..87ff862ff555 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -46,7 +46,6 @@ struct vfio_device { struct vfio_device_set *dev_set; struct list_head dev_set_list; unsigned int migration_flags; - /* Driver must reference the kvm during open_device or never touch it */ struct kvm *kvm; /* Members below here are private, not for driver use */ @@ -58,6 +57,7 @@ struct vfio_device { struct list_head group_next; struct list_head iommu_entry; struct iommufd_access *iommufd_access; + void (*put_kvm)(struct kvm *kvm); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; struct iommufd_ctx *iommufd_ictx; -- cgit v1.2.3 From fae9068022186b832534ea8b325d5242586c4303 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 9 Feb 2023 00:12:09 -0800 Subject: vfio: Update the kdoc for vfio_device_ops this is missed when adding bind_iommufd/unbind_iommufd and attach_ioas. Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230209081210.141372-2-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 87ff862ff555..93134b023968 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -70,6 +70,10 @@ struct vfio_device { * * @init: initialize private fields in device structure * @release: Reclaim private fields in device structure + * @bind_iommufd: Called when binding the device to an iommufd + * @unbind_iommufd: Opposite of bind_iommufd + * @attach_ioas: Called when attaching device to an IOAS/HWPT managed by the + * bound iommufd. Undo in unbind_iommufd. * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor -- cgit v1.2.3 From dd329e1e21b54c73f58a440b6164d04d8a7fc542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Feb 2023 20:59:09 +0100 Subject: cpufreq: Make cpufreq_unregister_driver() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All but a few drivers ignore the return value of cpufreq_unregister_driver(). Those few that don't only call it after cpufreq_register_driver() succeeded, in which case the call doesn't fail. Make the function return no value and add a WARN_ON for the case that the function is called in an invalid situation (i.e. without a previous successful call to cpufreq_register_driver()). Signed-off-by: Uwe Kleine-König Acked-by: Florian Fainelli # brcmstb-avs-cpufreq.c Acked-by: Viresh Kumar Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 6a94a6eaad27..65623233ab2f 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -448,7 +448,7 @@ struct cpufreq_driver { #define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING BIT(6) int cpufreq_register_driver(struct cpufreq_driver *driver_data); -int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); +void cpufreq_unregister_driver(struct cpufreq_driver *driver_data); bool cpufreq_driver_test_flags(u16 flags); const char *cpufreq_get_current_driver(void); -- cgit v1.2.3 From 19409796578c879a41e88ddbdbce50c19457658d Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Wed, 23 Nov 2022 10:55:26 +0100 Subject: include/linux/bcd.h: provide bcd_is_valid() helper bcd2bin(0x0A) happily returns 10, despite this being an invalid BCD value. RTC drivers converting possibly corrupted BCD timestamps might want to validate their input before calling bcd2bin(). Provide a macro to do so. Unlike bcd2bin and bin2bcd, out-of-line versions are not implemented. Should the macro experience enough use, this can be retrofitted. Signed-off-by: Ahmad Fatoum Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20221123095527.2771434-2-s.hauer@pengutronix.de Signed-off-by: Alexandre Belloni --- include/linux/bcd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcd.h b/include/linux/bcd.h index 118bea36d7d4..abbc8149178e 100644 --- a/include/linux/bcd.h +++ b/include/linux/bcd.h @@ -14,8 +14,12 @@ const_bin2bcd(x) : \ _bin2bcd(x)) +#define bcd_is_valid(x) \ + const_bcd_is_valid(x) + #define const_bcd2bin(x) (((x) & 0x0f) + ((x) >> 4) * 10) #define const_bin2bcd(x) ((((x) / 10) << 4) + (x) % 10) +#define const_bcd_is_valid(x) (((x) & 0x0f) < 10 && ((x) >> 4) < 10) unsigned _bcd2bin(unsigned char val) __attribute_const__; unsigned char _bin2bcd(unsigned val) __attribute_const__; -- cgit v1.2.3 From badc28d4924bfed73efc93f716a0c3aa3afbdf6f Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Thu, 2 Feb 2023 18:56:12 +0800 Subject: mm: shrinkers: fix deadlock in shrinker debugfs The debugfs_remove_recursive() is invoked by unregister_shrinker(), which is holding the write lock of shrinker_rwsem. It will waits for the handler of debugfs file complete. The handler also needs to hold the read lock of shrinker_rwsem to do something. So it may cause the following deadlock: CPU0 CPU1 debugfs_file_get() shrinker_debugfs_count_show()/shrinker_debugfs_scan_write() unregister_shrinker() --> down_write(&shrinker_rwsem); debugfs_remove_recursive() // wait for (A) --> wait_for_completion(); // wait for (B) --> down_read_killable(&shrinker_rwsem) debugfs_file_put() -- (A) up_write() -- (B) The down_read_killable() can be killed, so that the above deadlock can be recovered. But it still requires an extra kill action, otherwise it will block all subsequent shrinker-related operations, so it's better to fix it. [akpm@linux-foundation.org: fix CONFIG_SHRINKER_DEBUG=n stub] Link: https://lkml.kernel.org/r/20230202105612.64641-1-zhengqi.arch@bytedance.com Fixes: 5035ebc644ae ("mm: shrinkers: introduce debugfs interface for memory shrinkers") Signed-off-by: Qi Zheng Reviewed-by: Roman Gushchin Cc: Kent Overstreet Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 71310efe2fab..7bde8e1c228a 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -107,7 +107,7 @@ extern void synchronize_shrinkers(void); #ifdef CONFIG_SHRINKER_DEBUG extern int shrinker_debugfs_add(struct shrinker *shrinker); -extern void shrinker_debugfs_remove(struct shrinker *shrinker); +extern struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker); extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); #else /* CONFIG_SHRINKER_DEBUG */ @@ -115,8 +115,9 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker) { return 0; } -static inline void shrinker_debugfs_remove(struct shrinker *shrinker) +static inline struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker) { + return NULL; } static inline __printf(2, 3) int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) -- cgit v1.2.3 From c643e6ebedb435bcf863001f5e69a578f2658055 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 3 Feb 2023 16:28:40 -0500 Subject: mm: fix memcpy_from_file_folio() integer underflow If we have a HIGHMEM system with a large folio, 'offset' may be larger than PAGE_SIZE, and so min_t will cap at 'len' instead of the intended end-of-page. That can overflow into the next page which is likely to be unmapped and fault, but could theoretically copy the wrong data. Link: https://lkml.kernel.org/r/Y919vmSrtAgsf6K3@casper.infradead.org Fixes: 00cdf76012ab ("mm: add memcpy_from_file_folio()") Signed-off-by: Matthew Wilcox (Oracle) Cc: "Fabio M. De Francesco" Cc: Ira Weiny Signed-off-by: Andrew Morton --- include/linux/highmem.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 348701dae77f..b06254e76d99 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -431,9 +431,10 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); - if (folio_test_highmem(folio)) + if (folio_test_highmem(folio)) { + offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); - else + } else len = min(len, folio_size(folio) - offset); memcpy(to, from, len); -- cgit v1.2.3 From e7f43ca99fc8bff2333547bb08dae20a35a23450 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:02 -0500 Subject: maple_tree: add mas_init() function Patch series "VMA tree type safety and remove __vma_adjust()", v4. This patchset does two things: 1. Clean up, including removal of __vma_adjust() and 2. Extends the VMA iterator API to provide type safety to the VMA operations using the maple tree, as requested by Linus [1]. It also addresses another issue of usability brought up by Linus about needing to modify the maple state within the loops. The maple state has been replaced by the VMA iterator and the iterator is now modified within the MM code so the caller should not need to worry about doing the work themselves when tree modifications occur. This brought up a potential inconsistency of the iterator state and what the user expects, so the inconsistency is addressed to keep the VMA iterator safe for use after the looping over a VMA range. This is addressed in patch 3 ("maple_tree: Reduce user error potential") and 4 ("test_maple_tree: Test modifications while iterating"). While cleaning up the state, the duplicate locking code in mm/mmap.c introduced by the maple tree has been address by abstracting it to two functions: vma_prepare() and vma_complete(). These abstractions allowed for a much simpler __vma_adjust(), which eventually leads to the removal of the __vma_adjust() function by placing the logic into the vma_merge() function itself. 1. https://lore.kernel.org/linux-mm/CAHk-=wg9WQXBGkNdKD2bqocnN73rDswuWsavBB7T-tekykEn_A@mail.gmail.com/ This patch (of 49): Add a function that will zero out the maple state struct and set some basic defaults. Link: https://lkml.kernel.org/r/20230120162650.984577-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20230120162650.984577-2-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index a7bf58fd7cc6..1fadb5f5978b 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -432,6 +432,7 @@ struct ma_wr_state { .min = 0, \ .max = ULONG_MAX, \ .alloc = NULL, \ + .mas_flags = 0, \ } #define MA_WR_STATE(name, ma_state, wr_entry) \ @@ -470,6 +471,16 @@ void *mas_next(struct ma_state *mas, unsigned long max); int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max, unsigned long size); +static inline void mas_init(struct ma_state *mas, struct maple_tree *tree, + unsigned long addr) +{ + memset(mas, 0, sizeof(struct ma_state)); + mas->tree = tree; + mas->index = mas->last = addr; + mas->max = ULONG_MAX; + mas->node = MAS_START; +} + /* Checks if a mas has not found anything */ static inline bool mas_is_none(struct ma_state *mas) { -- cgit v1.2.3 From b62b633e048bbddef90b2e55d2e33823187b425f Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:08 -0500 Subject: mm: expand vma iterator interface Add wrappers for the maple tree to the vma iterator. This will provide type safety at compile time. Link: https://lkml.kernel.org/r/20230120162650.984577-8-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 46 ++++++++++++++++++++++++++++++++++++++++++---- include/linux/mm_types.h | 4 +--- 2 files changed, 43 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c9db257f09b3..b977a90d9829 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -670,16 +670,16 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) static inline struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) { - return mas_find(&vmi->mas, max); + return mas_find(&vmi->mas, max - 1); } static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) { /* - * Uses vma_find() to get the first VMA when the iterator starts. + * Uses mas_find() to get the first VMA when the iterator starts. * Calling mas_next() could skip the first entry. */ - return vma_find(vmi, ULONG_MAX); + return mas_find(&vmi->mas, ULONG_MAX); } static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) @@ -692,12 +692,50 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) return vmi->mas.index; } +static inline unsigned long vma_iter_end(struct vma_iterator *vmi) +{ + return vmi->mas.last + 1; +} +static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi, + unsigned long count) +{ + return mas_expected_entries(&vmi->mas, count); +} + +/* Free any unused preallocations */ +static inline void vma_iter_free(struct vma_iterator *vmi) +{ + mas_destroy(&vmi->mas); +} + +static inline int vma_iter_bulk_store(struct vma_iterator *vmi, + struct vm_area_struct *vma) +{ + vmi->mas.index = vma->vm_start; + vmi->mas.last = vma->vm_end - 1; + mas_store(&vmi->mas, vma); + if (unlikely(mas_is_err(&vmi->mas))) + return -ENOMEM; + + return 0; +} + +static inline void vma_iter_invalidate(struct vma_iterator *vmi) +{ + mas_pause(&vmi->mas); +} + +static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr) +{ + mas_set(&vmi->mas, addr); +} + #define for_each_vma(__vmi, __vma) \ while (((__vma) = vma_next(&(__vmi))) != NULL) /* The MM code likes to work with exclusive end addresses */ #define for_each_vma_range(__vmi, __vma, __end) \ - while (((__vma) = vma_find(&(__vmi), (__end) - 1)) != NULL) + while (((__vma) = vma_find(&(__vmi), (__end))) != NULL) #ifdef CONFIG_SHMEM /* diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 452920467223..5ca11c6c46e8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -849,9 +849,7 @@ struct vma_iterator { static inline void vma_iter_init(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long addr) { - vmi->mas.tree = &mm->mm_mt; - vmi->mas.index = addr; - vmi->mas.node = MAS_START; + mas_init(&vmi->mas, &mm->mm_mt, addr); } struct mmu_gather; -- cgit v1.2.3 From 183654ce26a5d5bd7bc11bcb02e8086f02f66d7d Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:13 -0500 Subject: mmap: change do_mas_munmap and do_mas_aligned_munmap() to use vma iterator Start passing the vma iterator through the mm code. This will allow for reuse of the state and cleaner invalidation if necessary. Link: https://lkml.kernel.org/r/20230120162650.984577-13-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index b977a90d9829..152a1362b800 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2905,7 +2905,7 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr, extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf); -extern int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm, +extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, bool downgrade); extern int do_munmap(struct mm_struct *, unsigned long, size_t, -- cgit v1.2.3 From f2ebfe43ba6c845e70b6acbabd6c69ab74b3c52e Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:15 -0500 Subject: mm: add temporary vma iterator versions of vma_merge(), split_vma(), and __split_vma() These wrappers are short-lived in this patch set so that each user can be converted on its own. In the end, these functions are renamed in one commit. Link: https://lkml.kernel.org/r/20230120162650.984577-15-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 152a1362b800..956025940053 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2843,11 +2843,20 @@ extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *); +extern struct vm_area_struct *vmi_vma_merge(struct vma_iterator *vmi, + struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, + unsigned long end, unsigned long vm_flags, struct anon_vma *, + struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx, + struct anon_vma_name *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int __split_vma(struct mm_struct *, struct vm_area_struct *, - unsigned long addr, int new_below); + unsigned long addr, int new_below); +extern int vmi__split_vma(struct vma_iterator *vmi, struct mm_struct *, + struct vm_area_struct *, unsigned long addr, int new_below); extern int split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); +extern int vmi_split_vma(struct vma_iterator *vmi, struct mm_struct *, + struct vm_area_struct *, unsigned long addr, int new_below); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, -- cgit v1.2.3 From 27b267011296e35dd5c983bf6c53b7230c78f383 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 26 Jan 2023 16:20:49 -0500 Subject: ipc/shm: introduce new do_vma_munmap() to munmap The shm already has the vma iterator in position for a write. do_vmi_munmap() searches for the correct position and aligns the write, so it is not the right function to use in this case. The shm VMA tree modification is similar to the brk munmap situation, the vma iterator is in position and the VMA is already known. This patch generalizes the brk munmap function do_brk_munmap() to be used for any other callers with the vma iterator already in position to munmap a VMA. Link: https://lkml.kernel.org/r/20230126212049.980501-1-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Reported-by: Sven Schnelle Link: https://lore.kernel.org/linux-mm/yt9dh6wec21a.fsf@linux.ibm.com/ Cc: Arnd Bergmann Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 956025940053..5b5f26d6588a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2922,6 +2922,9 @@ extern int do_munmap(struct mm_struct *, unsigned long, size_t, extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); #ifdef CONFIG_MMU +extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *uf, bool downgrade); extern int __mm_populate(unsigned long addr, unsigned long len, int ignore_errors); static inline void mm_populate(unsigned long addr, unsigned long len) -- cgit v1.2.3 From 2286a6914c776ec34cd97e4573b1466d055cb9de Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:18 -0500 Subject: mm: change mprotect_fixup to vma iterator Use the vma iterator so that the iterator can be invalidated or updated to avoid each caller doing so. Link: https://lkml.kernel.org/r/20230120162650.984577-18-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5b5f26d6588a..144ddfd65992 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2197,9 +2197,9 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, extern long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long cp_flags); -extern int mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, - struct vm_area_struct **pprev, unsigned long start, - unsigned long end, unsigned long newflags); +extern int mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, + struct vm_area_struct *vma, struct vm_area_struct **pprev, + unsigned long start, unsigned long end, unsigned long newflags); /* * doesn't attempt to fault and will return short. -- cgit v1.2.3 From 9760ebffbf5507320e0de41f5b80089bdef996a0 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:30 -0500 Subject: mm: switch vma_merge(), split_vma(), and __split_vma to vma iterator Drop the vmi_* functions and transition all users to use the vma iterator directly. Link: https://lkml.kernel.org/r/20230120162650.984577-30-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 144ddfd65992..f3b49feb5c35 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2839,24 +2839,16 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, { return __vma_adjust(vma, start, end, pgoff, insert, NULL); } -extern struct vm_area_struct *vma_merge(struct mm_struct *, - struct vm_area_struct *prev, unsigned long addr, unsigned long end, - unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, - struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *); -extern struct vm_area_struct *vmi_vma_merge(struct vma_iterator *vmi, +extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); -extern int __split_vma(struct mm_struct *, struct vm_area_struct *, - unsigned long addr, int new_below); -extern int vmi__split_vma(struct vma_iterator *vmi, struct mm_struct *, - struct vm_area_struct *, unsigned long addr, int new_below); -extern int split_vma(struct mm_struct *, struct vm_area_struct *, - unsigned long addr, int new_below); -extern int vmi_split_vma(struct vma_iterator *vmi, struct mm_struct *, - struct vm_area_struct *, unsigned long addr, int new_below); +extern int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *, + unsigned long addr, int new_below); +extern int split_vma(struct vma_iterator *vmi, struct vm_area_struct *, + unsigned long addr, int new_below); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, -- cgit v1.2.3 From fbcc3104b8437cc1babf04421e8bb8181561343e Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:32 -0500 Subject: mmap: convert __vma_adjust() to use vma iterator Use the vma iterator internally for __vma_adjust(). Avoid using the maple tree interface directly for type safety. Link: https://lkml.kernel.org/r/20230120162650.984577-32-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f3b49feb5c35..2f62d687e9bd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2856,9 +2856,6 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **, bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); -void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas); -void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas); - static inline int check_data_rlimit(unsigned long rlim, unsigned long new, unsigned long start, -- cgit v1.2.3 From 9e56044625a1f472edc278105f41a60726991d89 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:33 -0500 Subject: mm: pass through vma iterator to __vma_adjust() Pass the vma iterator through to __vma_adjust() so the state can be updated. Link: https://lkml.kernel.org/r/20230120162650.984577-33-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2f62d687e9bd..9c15f401f295 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2831,13 +2831,15 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern int __vma_adjust(struct vm_area_struct *vma, unsigned long start, +extern int __vma_adjust(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, struct vm_area_struct *expand); static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) { - return __vma_adjust(vma, start, end, pgoff, insert, NULL); + VMA_ITERATOR(vmi, vma->vm_mm, start); + + return __vma_adjust(&vmi, vma, start, end, pgoff, insert, NULL); } extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, -- cgit v1.2.3 From b373037fa9bb374f26bbabc0779fe990d02d33b7 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:37 -0500 Subject: mm: add vma iterator to vma_adjust() arguments Change the vma_adjust() function definition to accept the vma iterator and pass it through to __vma_adjust(). Update fs/exec to use the new vma_adjust() function parameters. Update mm/mremap to use the new vma_adjust() function parameters. Revert the __split_vma() calls back from __vma_adjust() to vma_adjust() and pass through the vma iterator. Link: https://lkml.kernel.org/r/20230120162650.984577-37-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9c15f401f295..2e95287a9f74 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2834,12 +2834,11 @@ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admi extern int __vma_adjust(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, struct vm_area_struct *expand); -static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) +static inline int vma_adjust(struct vma_iterator *vmi, + struct vm_area_struct *vma, unsigned long start, unsigned long end, + pgoff_t pgoff, struct vm_area_struct *insert) { - VMA_ITERATOR(vmi, vma->vm_mm, start); - - return __vma_adjust(&vmi, vma, start, end, pgoff, insert, NULL); + return __vma_adjust(vmi, vma, start, end, pgoff, insert, NULL); } extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, -- cgit v1.2.3 From b2b3b886738fec5e89ca9ebc720eba1a8f615753 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:44 -0500 Subject: mm: don't use __vma_adjust() in __split_vma() Use the abstracted locking and maple tree operations. Since __split_vma() is the only user of the __vma_adjust() function to use the insert argument, drop that argument. Remove the NULL passed through from fs/exec's shift_arg_pages() and mremap() at the same time. Link: https://lkml.kernel.org/r/20230120162650.984577-44-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e95287a9f74..3845de5d2581 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2832,13 +2832,12 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); extern int __vma_adjust(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, - struct vm_area_struct *expand); + unsigned long end, pgoff_t pgoff, struct vm_area_struct *expand); static inline int vma_adjust(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, - pgoff_t pgoff, struct vm_area_struct *insert) + pgoff_t pgoff) { - return __vma_adjust(vmi, vma, start, end, pgoff, insert, NULL); + return __vma_adjust(vmi, vma, start, end, pgoff, NULL); } extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, -- cgit v1.2.3 From 7c9813e886bb52495ff5b97d4b0f1320d36d869b Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:45 -0500 Subject: mm/mremap: convert vma_adjust() to vma_expand() Stop using vma_adjust() in preparation for removing the function. Export vma_expand() to use instead. Link: https://lkml.kernel.org/r/20230120162650.984577-45-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3845de5d2581..245fb30858c9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2839,6 +2839,9 @@ static inline int vma_adjust(struct vma_iterator *vmi, { return __vma_adjust(vmi, vma, start, end, pgoff, NULL); } +extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, + unsigned long start, unsigned long end, pgoff_t pgoff, + struct vm_area_struct *next); extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, -- cgit v1.2.3 From cf51e86dfbe39b7cae3a9de650d035af22dd5fb4 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:46 -0500 Subject: mm/mmap: don't use __vma_adjust() in shift_arg_pages() Introduce shrink_vma() which uses the vma_prepare() and vma_complete() functions to reduce the vma coverage. Convert shift_arg_pages() to use expand_vma() and the new shrink_vma() function. Remove support from __vma_adjust() to reduce a vma size since shift_arg_pages() is the only user that shrinks a VMA in this way. Link: https://lkml.kernel.org/r/20230120162650.984577-46-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 245fb30858c9..dcc34533d2f6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2831,17 +2831,11 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern int __vma_adjust(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgoff_t pgoff, struct vm_area_struct *expand); -static inline int vma_adjust(struct vma_iterator *vmi, - struct vm_area_struct *vma, unsigned long start, unsigned long end, - pgoff_t pgoff) -{ - return __vma_adjust(vmi, vma, start, end, pgoff, NULL); -} extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *next); +extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, + unsigned long start, unsigned long end, pgoff_t pgoff); extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, -- cgit v1.2.3 From bc292ab00f6c7a661a8a605c714e8a148f629ef6 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 26 Jan 2023 11:37:47 -0800 Subject: mm: introduce vma->vm_flags wrapper functions vm_flags are among VMA attributes which affect decisions like VMA merging and splitting. Therefore all vm_flags modifications are performed after taking exclusive mmap_lock to prevent vm_flags updates racing with such operations. Introduce modifier functions for vm_flags to be used whenever flags are updated. This way we can better check and control correct locking behavior during these updates. Link: https://lkml.kernel.org/r/20230126193752.297968-3-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Davidlohr Bueso Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Mike Rapoport (IBM) Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Andy Lutomirski Cc: Arjun Roy Cc: Axel Rasmussen Cc: David Hildenbrand Cc: David Howells Cc: David Rientjes Cc: Eric Dumazet Cc: Greg Thelen Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jann Horn Cc: Joel Fernandes Cc: Johannes Weiner Cc: Kent Overstreet Cc: Laurent Dufour Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Minchan Kim Cc: Paul E. McKenney Cc: Peter Oskolkov Cc: Peter Xu Cc: Peter Zijlstra Cc: Punit Agrawal Cc: Sebastian Andrzej Siewior Cc: Sebastian Reichel Cc: Shakeel Butt Cc: Soheil Hassas Yeganeh Cc: Song Liu Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 40 ++++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 10 +++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index dcc34533d2f6..e2df5d122b67 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -627,6 +627,46 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) INIT_LIST_HEAD(&vma->anon_vma_chain); } +/* Use when VMA is not part of the VMA tree and needs no locking */ +static inline void vm_flags_init(struct vm_area_struct *vma, + vm_flags_t flags) +{ + ACCESS_PRIVATE(vma, __vm_flags) = flags; +} + +/* Use when VMA is part of the VMA tree and modifications need coordination */ +static inline void vm_flags_reset(struct vm_area_struct *vma, + vm_flags_t flags) +{ + mmap_assert_write_locked(vma->vm_mm); + vm_flags_init(vma, flags); +} + +static inline void vm_flags_set(struct vm_area_struct *vma, + vm_flags_t flags) +{ + mmap_assert_write_locked(vma->vm_mm); + ACCESS_PRIVATE(vma, __vm_flags) |= flags; +} + +static inline void vm_flags_clear(struct vm_area_struct *vma, + vm_flags_t flags) +{ + mmap_assert_write_locked(vma->vm_mm); + ACCESS_PRIVATE(vma, __vm_flags) &= ~flags; +} + +/* + * Use only when the order of set/clear operations is unimportant, otherwise + * use vm_flags_{set|clear} explicitly. + */ +static inline void vm_flags_mod(struct vm_area_struct *vma, + vm_flags_t set, vm_flags_t clear) +{ + mmap_assert_write_locked(vma->vm_mm); + vm_flags_init(vma, (vma->vm_flags | set) & ~clear); +} + static inline void vma_set_anonymous(struct vm_area_struct *vma) { vma->vm_ops = NULL; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5ca11c6c46e8..10a1e41f4e70 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -491,7 +491,15 @@ struct vm_area_struct { * See vmf_insert_mixed_prot() for discussion. */ pgprot_t vm_page_prot; - unsigned long vm_flags; /* Flags, see mm.h. */ + + /* + * Flags, see mm.h. + * To modify use vm_flags_{init|reset|set|clear|mod} functions. + */ + union { + const vm_flags_t vm_flags; + vm_flags_t __private __vm_flags; + }; /* * For areas with an address space and backing store, -- cgit v1.2.3 From e430a95a04efc557bc4ff9b3035c7c85aee5d63f Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 26 Jan 2023 11:37:48 -0800 Subject: mm: replace VM_LOCKED_CLEAR_MASK with VM_LOCKED_MASK To simplify the usage of VM_LOCKED_CLEAR_MASK in vm_flags_clear(), replace it with VM_LOCKED_MASK bitmask and convert all users. Link: https://lkml.kernel.org/r/20230126193752.297968-4-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Mike Rapoport (IBM) Reviewed-by: Davidlohr Bueso Cc: Andy Lutomirski Cc: Arjun Roy Cc: Axel Rasmussen Cc: David Hildenbrand Cc: David Howells Cc: David Rientjes Cc: Eric Dumazet Cc: Greg Thelen Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jann Horn Cc: Joel Fernandes Cc: Johannes Weiner Cc: Kent Overstreet Cc: Laurent Dufour Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Minchan Kim Cc: Paul E. McKenney Cc: Peter Oskolkov Cc: Peter Xu Cc: Peter Zijlstra Cc: Punit Agrawal Cc: Sebastian Andrzej Siewior Cc: Sebastian Reichel Cc: Shakeel Butt Cc: Soheil Hassas Yeganeh Cc: Song Liu Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index e2df5d122b67..663726ca2240 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -421,8 +421,8 @@ extern unsigned int kobjsize(const void *objp); /* This mask defines which mm->def_flags a process can inherit its parent */ #define VM_INIT_DEF_MASK VM_NOHUGEPAGE -/* This mask is used to clear all the VMA flags used by mlock */ -#define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT)) +/* This mask represents all the VMA flag bits used by mlock */ +#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) /* Arch-specific flags to clear when updating VM flags on protection change */ #ifndef VM_ARCH_CLEAR -- cgit v1.2.3 From 1c71222e5f2393b5ea1a41795c67589eea7e3490 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 26 Jan 2023 11:37:49 -0800 Subject: mm: replace vma->vm_flags direct modifications with modifier calls Replace direct modifications to vma->vm_flags with calls to modifier functions to be able to track flag changes and to keep vma locking correctness. [akpm@linux-foundation.org: fix drivers/misc/open-dice.c, per Hyeonggon Yoo] Link: https://lkml.kernel.org/r/20230126193752.297968-5-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Mike Rapoport (IBM) Acked-by: Sebastian Reichel Reviewed-by: Liam R. Howlett Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Andy Lutomirski Cc: Arjun Roy Cc: Axel Rasmussen Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: David Rientjes Cc: Eric Dumazet Cc: Greg Thelen Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jann Horn Cc: Joel Fernandes Cc: Johannes Weiner Cc: Kent Overstreet Cc: Laurent Dufour Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Minchan Kim Cc: Paul E. McKenney Cc: Peter Oskolkov Cc: Peter Xu Cc: Peter Zijlstra Cc: Punit Agrawal Cc: Sebastian Andrzej Siewior Cc: Shakeel Butt Cc: Soheil Hassas Yeganeh Cc: Song Liu Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 663726ca2240..ce6d9d765aae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3653,7 +3653,7 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) * VM_MAYWRITE as we still want them to be COW-writable. */ if (vma->vm_flags & VM_SHARED) - vma->vm_flags &= ~(VM_MAYWRITE); + vm_flags_clear(vma, VM_MAYWRITE); } return 0; -- cgit v1.2.3 From 68f48381d7fdd1cbb9d88c37a4dfbb98ac78226d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 26 Jan 2023 11:37:51 -0800 Subject: mm: introduce __vm_flags_mod and use it in untrack_pfn There are scenarios when vm_flags can be modified without exclusive mmap_lock, such as: - after VMA was isolated and mmap_lock was downgraded or dropped - in exit_mmap when there are no other mm users and locking is unnecessary Introduce __vm_flags_mod to avoid assertions when the caller takes responsibility for the required locking. Pass a hint to untrack_pfn to conditionally use __vm_flags_mod for flags modification to avoid assertion. Link: https://lkml.kernel.org/r/20230126193752.297968-7-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Michal Hocko Acked-by: Mike Rapoport (IBM) Cc: Andy Lutomirski Cc: Arjun Roy Cc: Axel Rasmussen Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: David Rientjes Cc: Eric Dumazet Cc: Greg Thelen Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jann Horn Cc: Joel Fernandes Cc: Johannes Weiner Cc: Kent Overstreet Cc: Laurent Dufour Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Mel Gorman Cc: Minchan Kim Cc: Paul E. McKenney Cc: Peter Oskolkov Cc: Peter Xu Cc: Peter Zijlstra Cc: Punit Agrawal Cc: Sebastian Andrzej Siewior Cc: Sebastian Reichel Cc: Shakeel Butt Cc: Soheil Hassas Yeganeh Cc: Song Liu Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 14 ++++++++++++-- include/linux/pgtable.h | 5 +++-- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ce6d9d765aae..27b34f7730e7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -656,6 +656,16 @@ static inline void vm_flags_clear(struct vm_area_struct *vma, ACCESS_PRIVATE(vma, __vm_flags) &= ~flags; } +/* + * Use only if VMA is not part of the VMA tree or has no other users and + * therefore needs no locking. + */ +static inline void __vm_flags_mod(struct vm_area_struct *vma, + vm_flags_t set, vm_flags_t clear) +{ + vm_flags_init(vma, (vma->vm_flags | set) & ~clear); +} + /* * Use only when the order of set/clear operations is unimportant, otherwise * use vm_flags_{set|clear} explicitly. @@ -664,7 +674,7 @@ static inline void vm_flags_mod(struct vm_area_struct *vma, vm_flags_t set, vm_flags_t clear) { mmap_assert_write_locked(vma->vm_mm); - vm_flags_init(vma, (vma->vm_flags | set) & ~clear); + __vm_flags_mod(vma, set, clear); } static inline void vma_set_anonymous(struct vm_area_struct *vma) @@ -2085,7 +2095,7 @@ static inline void zap_vma_pages(struct vm_area_struct *vma) } void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, - unsigned long end); + unsigned long end, bool mm_wr_locked); struct mmu_notifier_range; diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5fd45454c073..c63cd44777ec 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1185,7 +1185,8 @@ static inline int track_pfn_copy(struct vm_area_struct *vma) * can be for the entire vma (in which case pfn, size are zero). */ static inline void untrack_pfn(struct vm_area_struct *vma, - unsigned long pfn, unsigned long size) + unsigned long pfn, unsigned long size, + bool mm_wr_locked) { } @@ -1203,7 +1204,7 @@ extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn); extern int track_pfn_copy(struct vm_area_struct *vma); extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size); + unsigned long size, bool mm_wr_locked); extern void untrack_pfn_moved(struct vm_area_struct *vma); #endif -- cgit v1.2.3 From 601c3c29dbeb049862faa00917f2daf094a71028 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 31 Jan 2023 16:01:16 -0800 Subject: mm: introduce vm_flags_reset_once to replace WRITE_ONCE vm_flags updates Provide vm_flags_reset_once() and replace the vm_flags updates which used WRITE_ONCE() to prevent compiler optimizations. Link: https://lkml.kernel.org/r/20230201000116.1333160-1-surenb@google.com Fixes: 0cce31a0aa0e ("mm: replace vma->vm_flags direct modifications with modifier calls") Signed-off-by: Suren Baghdasaryan Reported-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: Michal Hocko Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 27b34f7730e7..0ed0cb2401f5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -642,6 +642,13 @@ static inline void vm_flags_reset(struct vm_area_struct *vma, vm_flags_init(vma, flags); } +static inline void vm_flags_reset_once(struct vm_area_struct *vma, + vm_flags_t flags) +{ + mmap_assert_write_locked(vma->vm_mm); + WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); +} + static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) { -- cgit v1.2.3 From 3e629597b8477efbcc0ad14ee80558a080eebdc3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Feb 2023 16:25:19 +0000 Subject: filemap: add mapping_read_folio_gfp() This is like read_cache_page_gfp() except it returns the folio instead of the precise page. Link: https://lkml.kernel.org/r/20230206162520.4029022-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Charan Teja Kalla Cc: David Rientjes Cc: Hugh Dickins Cc: Mark Hemment Cc: Michal Hocko Cc: Pavankumar Kondeti Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 9f1081683771..6a32ac170d3d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -756,6 +756,8 @@ static inline struct page *grab_cache_page(struct address_space *mapping, struct folio *read_cache_folio(struct address_space *, pgoff_t index, filler_t *filler, struct file *file); +struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index, + gfp_t flags); struct page *read_cache_page(struct address_space *, pgoff_t index, filler_t *filler, struct file *file); extern struct page * read_cache_page_gfp(struct address_space *mapping, -- cgit v1.2.3 From f01b2b3ed8735dacd92f1da548708449525e286a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Feb 2023 16:25:20 +0000 Subject: shmem: add shmem_read_folio() and shmem_read_folio_gfp() These are the folio replacements for shmem_read_mapping_page() and shmem_read_mapping_page_gfp(). [akpm@linux-foundation.org: fix shmem_read_mapping_page_gfp(), per Matthew] Link: https://lkml.kernel.org/r/Y+QdJTuzxeBYejw2@casper.infradead.org Link: https://lkml.kernel.org/r/20230206162520.4029022-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Mark Hemment Cc: Charan Teja Kalla Cc: David Rientjes Cc: Hugh Dickins Cc: Michal Hocko Cc: Pavankumar Kondeti Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index d09d54be4ffd..103d1000a5a2 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -109,6 +109,14 @@ enum sgp_type { int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, enum sgp_type sgp); +struct folio *shmem_read_folio_gfp(struct address_space *mapping, + pgoff_t index, gfp_t gfp); + +static inline struct folio *shmem_read_folio(struct address_space *mapping, + pgoff_t index) +{ + return shmem_read_folio_gfp(mapping, index, mapping_gfp_mask(mapping)); +} static inline struct page *shmem_read_mapping_page( struct address_space *mapping, pgoff_t index) -- cgit v1.2.3 From 869176a096068056b338b5cc1b0af93106007f5d Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Mon, 6 Feb 2023 16:40:15 +0800 Subject: mm/vmalloc.c: add flags to mark vm_map_ram area Through vmalloc API, a virtual kernel area is reserved for physical address mapping. And vmap_area is used to track them, while vm_struct is allocated to associate with the vmap_area to store more information and passed out. However, area reserved via vm_map_ram() is an exception. It doesn't have vm_struct to associate with vmap_area. And we can't recognize the vmap_area with '->vm == NULL' as a vm_map_ram() area because the normal freeing path will set va->vm = NULL before unmapping, please see function remove_vm_area(). Meanwhile, there are two kinds of handling for vm_map_ram area. One is the whole vmap_area being reserved and mapped at one time through vm_map_area() interface; the other is the whole vmap_area with VMAP_BLOCK_SIZE size being reserved, while mapped into split regions with smaller size via vb_alloc(). To mark the area reserved through vm_map_ram(), add flags field into struct vmap_area. Bit 0 indicates this is vm_map_ram area created through vm_map_ram() interface, while bit 1 marks out the type of vm_map_ram area which makes use of vmap_block to manage split regions via vb_alloc/free(). This is a preparation for later use. Link: https://lkml.kernel.org/r/20230206084020.174506-3-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Lorenzo Stoakes Reviewed-by: Uladzislau Rezki (Sony) Cc: Dan Carpenter Cc: Stephen Brennan Signed-off-by: Andrew Morton --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 096d48aa3437..69250efa03d1 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -76,6 +76,7 @@ struct vmap_area { unsigned long subtree_max_size; /* in "free" tree */ struct vm_struct *vm; /* in "busy" tree */ }; + unsigned long flags; /* mark type of vm_map_ram area */ }; /* archs that select HAVE_ARCH_HUGE_VMAP should override one or more of these */ -- cgit v1.2.3 From 7427c30bea1449a885a1dd9baf991aaad26209ce Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:23 -0400 Subject: mm/gup: remove obsolete FOLL_LONGTERM comment These days FOLL_LONGTERM is not allowed at all on any get_user_pages*() functions, it must be only be used with pin_user_pages*(), plus it now has universal support for all the pin_user_pages*() functions. Link: https://lkml.kernel.org/r/2-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Cc: Alistair Popple Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: David Hildenbrand Cc: David Howells Cc: Mike Rapoport (IBM) Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 10a1e41f4e70..4396c7bf06d1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1053,12 +1053,6 @@ typedef unsigned int __bitwise zap_flags_t; * specifically failed. Filesystem pages are still subject to bugs and use of * FOLL_LONGTERM should be avoided on those pages. * - * FIXME: Also NOTE that FOLL_LONGTERM is not supported in every GUP call. - * Currently only get_user_pages() and get_user_pages_fast() support this flag - * and calls to get_user_pages_[un]locked are specifically not allowed. This - * is due to an incompatibility with the FS DAX check and - * FAULT_FLAG_ALLOW_RETRY. - * * In the CMA case: long term pins in a CMA region would unnecessarily fragment * that region. And so, CMA attempts to migrate the page before pinning, when * FOLL_LONGTERM is specified. -- cgit v1.2.3 From 7ce154fe6917e7db94d63bc4d6c73b678ad1c581 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:25 -0400 Subject: mm/gup: move try_grab_page() to mm/internal.h This is part of the internal function of gup.c and is only non-static so that the parts of gup.c in the huge_memory.c and hugetlb.c can call it. Put it in internal.h beside the similarly purposed try_grab_folio() Link: https://lkml.kernel.org/r/4-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Cc: Alistair Popple Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: David Hildenbrand Cc: David Howells Cc: Mike Rapoport (IBM) Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ed0cb2401f5..afefc166b349 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1268,8 +1268,6 @@ static inline void get_page(struct page *page) folio_get(page_folio(page)); } -int __must_check try_grab_page(struct page *page, unsigned int flags); - static inline __must_check bool try_get_page(struct page *page) { page = compound_head(page); -- cgit v1.2.3 From f04740f54594f85935e29a5c8ff6722f427f3dac Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:29 -0400 Subject: mm/gup: add FOLL_UNLOCKABLE Setting FOLL_UNLOCKABLE allows GUP to lock/unlock the mmap lock on its own. It is a more explicit replacement for locked != NULL. This clears the way for passing in locked = 1, without intending that the lock can be unlocked. Set the flag in all cases where it is used, eg locked is present in the external interface or locked is used internally with locked = 0. Link: https://lkml.kernel.org/r/8-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Acked-by: Mike Rapoport (IBM) Reviewed-by: John Hubbard Cc: Alistair Popple Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: David Hildenbrand Cc: David Howells Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4396c7bf06d1..434b3ac8a351 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1104,5 +1104,6 @@ typedef unsigned int __bitwise zap_flags_t; #define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ #define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */ #define FOLL_INTERRUPTIBLE 0x200000 /* allow interrupts from generic signals */ +#define FOLL_UNLOCKABLE 0x400000 /* allow unlocking the mmap lock (internal only) */ #endif /* _LINUX_MM_TYPES_H */ -- cgit v1.2.3 From edad1bb1fbf7e28b49bf76b2aa66bfcaba00f627 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:31 -0400 Subject: mm/gup: remove pin_user_pages_fast_only() Commit ed29c2691188 ("drm/i915: Fix userptr so we do not have to worry about obj->mm.lock, v7.") removed the only caller, remove this dead code too. Link: https://lkml.kernel.org/r/10-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Acked-by: Mike Rapoport (IBM) Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Cc: Alistair Popple Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: David Howells Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index afefc166b349..a0d59645dcf9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2261,8 +2261,6 @@ extern int mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, */ int get_user_pages_fast_only(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); -int pin_user_pages_fast_only(unsigned long start, int nr_pages, - unsigned int gup_flags, struct page **pages); static inline bool get_user_page_fast_only(unsigned long addr, unsigned int gup_flags, struct page **pagep) -- cgit v1.2.3 From 63b605128655f2e3968d99e30b293c7e7eaa2fc2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:33 -0400 Subject: mm/gup: move gup_must_unshare() to mm/internal.h This function is only used in gup.c and closely related. It touches FOLL_PIN so it must be moved before the next patch. Link: https://lkml.kernel.org/r/12-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Cc: Alistair Popple Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: David Howells Cc: Mike Rapoport (IBM) Signed-off-by: Andrew Morton --- include/linux/mm.h | 65 ------------------------------------------------------ 1 file changed, 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a0d59645dcf9..4a0695ef969a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3180,71 +3180,6 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) return 0; } -/* - * Indicates for which pages that are write-protected in the page table, - * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the - * GUP pin will remain consistent with the pages mapped into the page tables - * of the MM. - * - * Temporary unmapping of PageAnonExclusive() pages or clearing of - * PageAnonExclusive() has to protect against concurrent GUP: - * * Ordinary GUP: Using the PT lock - * * GUP-fast and fork(): mm->write_protect_seq - * * GUP-fast and KSM or temporary unmapping (swap, migration): see - * page_try_share_anon_rmap() - * - * Must be called with the (sub)page that's actually referenced via the - * page table entry, which might not necessarily be the head page for a - * PTE-mapped THP. - * - * If the vma is NULL, we're coming from the GUP-fast path and might have - * to fallback to the slow path just to lookup the vma. - */ -static inline bool gup_must_unshare(struct vm_area_struct *vma, - unsigned int flags, struct page *page) -{ - /* - * FOLL_WRITE is implicitly handled correctly as the page table entry - * has to be writable -- and if it references (part of) an anonymous - * folio, that part is required to be marked exclusive. - */ - if ((flags & (FOLL_WRITE | FOLL_PIN)) != FOLL_PIN) - return false; - /* - * Note: PageAnon(page) is stable until the page is actually getting - * freed. - */ - if (!PageAnon(page)) { - /* - * We only care about R/O long-term pining: R/O short-term - * pinning does not have the semantics to observe successive - * changes through the process page tables. - */ - if (!(flags & FOLL_LONGTERM)) - return false; - - /* We really need the vma ... */ - if (!vma) - return true; - - /* - * ... because we only care about writable private ("COW") - * mappings where we have to break COW early. - */ - return is_cow_mapping(vma->vm_flags); - } - - /* Paired with a memory barrier in page_try_share_anon_rmap(). */ - if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) - smp_rmb(); - - /* - * Note that PageKsm() pages cannot be exclusive, and consequently, - * cannot get pinned. - */ - return !PageAnonExclusive(page); -} - /* * Indicates whether GUP can follow a PROT_NONE mapped page, or whether * a (NUMA hinting) fault is required. -- cgit v1.2.3 From 2c2241081f7dec878331fdc3a3f2361e99556bca Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:34 -0400 Subject: mm/gup: move private gup FOLL_ flags to internal.h Move the flags that should not/are not used outside gup.c and related into mm/internal.h to discourage driver abuse. To make this more maintainable going forward compact the two FOLL ranges with new bit numbers from 0 to 11 and 16 to 21, using shifts so it is explicit. Switch to an enum so the whole thing is easier to read. Link: https://lkml.kernel.org/r/13-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Acked-by: David Hildenbrand Cc: David Howells Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: Alistair Popple Cc: Mike Rapoport (IBM) Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 57 +++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 434b3ac8a351..56753d0f096d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1040,9 +1040,6 @@ typedef unsigned int __bitwise zap_flags_t; * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each * other. Here is what they mean, and how to use them: * - * FOLL_LONGTERM indicates that the page will be held for an indefinite time - * period _often_ under userspace control. This is in contrast to - * iov_iter_get_pages(), whose usages are transient. * * FIXME: For pages which are part of a filesystem, mappings are subject to the * lifetime enforced by the filesystem and we need guarantees that longterm @@ -1086,24 +1083,40 @@ typedef unsigned int __bitwise zap_flags_t; * Please see Documentation/core-api/pin_user_pages.rst for more information. */ -#define FOLL_WRITE 0x01 /* check pte is writable */ -#define FOLL_TOUCH 0x02 /* mark page accessed */ -#define FOLL_GET 0x04 /* do get_page on page */ -#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ -#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ -#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO - * and return without waiting upon it */ -#define FOLL_NOFAULT 0x80 /* do not fault in pages */ -#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ -#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ -#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ -#define FOLL_ANON 0x8000 /* don't do file mappings */ -#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ -#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ -#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ -#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ -#define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */ -#define FOLL_INTERRUPTIBLE 0x200000 /* allow interrupts from generic signals */ -#define FOLL_UNLOCKABLE 0x400000 /* allow unlocking the mmap lock (internal only) */ +enum { + /* check pte is writable */ + FOLL_WRITE = 1 << 0, + /* do get_page on page */ + FOLL_GET = 1 << 1, + /* give error on hole if it would be zero */ + FOLL_DUMP = 1 << 2, + /* get_user_pages read/write w/o permission */ + FOLL_FORCE = 1 << 3, + /* + * if a disk transfer is needed, start the IO and return without waiting + * upon it + */ + FOLL_NOWAIT = 1 << 4, + /* do not fault in pages */ + FOLL_NOFAULT = 1 << 5, + /* check page is hwpoisoned */ + FOLL_HWPOISON = 1 << 6, + /* don't do file mappings */ + FOLL_ANON = 1 << 7, + /* + * FOLL_LONGTERM indicates that the page will be held for an indefinite + * time period _often_ under userspace control. This is in contrast to + * iov_iter_get_pages(), whose usages are transient. + */ + FOLL_LONGTERM = 1 << 8, + /* split huge pmd before returning */ + FOLL_SPLIT_PMD = 1 << 9, + /* allow returning PCI P2PDMA pages */ + FOLL_PCI_P2PDMA = 1 << 10, + /* allow interrupts from generic signals */ + FOLL_INTERRUPTIBLE = 1 << 11, + + /* See also internal only FOLL flags in mm/internal.h */ +}; #endif /* _LINUX_MM_TYPES_H */ -- cgit v1.2.3 From 605cfa1b1090b5d9e227d8a8f7d08fdd04f07724 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 7 Feb 2023 19:44:57 +0100 Subject: net: introduce default_rps_mask netns attribute If RPS is enabled, this allows configuring a default rps mask, which is effective since receive queue creation time. A default RPS mask allows the system admin to ensure proper isolation, avoiding races at network namespace or device creation time. The default RPS mask is initially empty, and can be modified via a newly added sysctl entry. Signed-off-by: Paolo Abeni Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d5ef4c1fedd2..38ab96ae0d68 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -223,6 +223,7 @@ struct net_device_core_stats { #include extern struct static_key_false rps_needed; extern struct static_key_false rfs_needed; +extern struct cpumask rps_default_mask; #endif struct neighbour; -- cgit v1.2.3 From 17ce252266c7f016ece026492c45838f852ddc79 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 19 Jan 2023 08:32:05 -0800 Subject: dmaengine: xilinx: xdma: Add xilinx xdma driver Add driver to enable PCIe board which uses XDMA (the DMA/Bridge Subsystem for PCI Express). For example, Xilinx Alveo PCIe devices. https://www.xilinx.com/products/boards-and-kits/alveo.html The XDMA engine support up to 4 Host to Card (H2C) and 4 Card to Host (C2H) channels. Memory transfers are specified on a per-channel basis in descriptor linked lists, which the DMA fetches from host memory and processes. Events such as descriptor completion and errors are signaled using interrupts. The hardware detail is provided by https://docs.xilinx.com/r/en-US/pg195-pcie-dma/Introduction This driver implements dmaengine APIs. - probe the available DMA channels - use dma_slave_map for channel lookup - use virtual channel to manage dmaengine tx descriptors - implement device_prep_slave_sg callback to handle host scatter gather list - implement device_config to config device address for DMA transfer Signed-off-by: Lizhi Hou Signed-off-by: Sonal Santan Signed-off-by: Max Zhen Signed-off-by: Brian Xu Tested-by: Martin Tuma Link: https://lore.kernel.org/r/1674145926-29449-2-git-send-email-lizhi.hou@amd.com Signed-off-by: Vinod Koul --- include/linux/platform_data/amd_xdma.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/linux/platform_data/amd_xdma.h (limited to 'include/linux') diff --git a/include/linux/platform_data/amd_xdma.h b/include/linux/platform_data/amd_xdma.h new file mode 100644 index 000000000000..b5e23e14bac8 --- /dev/null +++ b/include/linux/platform_data/amd_xdma.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2022, Advanced Micro Devices, Inc. + */ + +#ifndef _PLATDATA_AMD_XDMA_H +#define _PLATDATA_AMD_XDMA_H + +#include + +/** + * struct xdma_chan_info - DMA channel information + * This information is used to match channel when request dma channel + * @dir: Channel transfer direction + */ +struct xdma_chan_info { + enum dma_transfer_direction dir; +}; + +#define XDMA_FILTER_PARAM(chan_info) ((void *)(chan_info)) + +struct dma_slave_map; + +/** + * struct xdma_platdata - platform specific data for XDMA engine + * @max_dma_channels: Maximum dma channels in each direction + */ +struct xdma_platdata { + u32 max_dma_channels; + u32 device_map_cnt; + struct dma_slave_map *device_map; +}; + +#endif /* _PLATDATA_AMD_XDMA_H */ -- cgit v1.2.3 From ecf294a6f63f882319485f807754dacaeae96e9d Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 19 Jan 2023 08:32:06 -0800 Subject: dmaengine: xilinx: xdma: Add user logic interrupt support The Xilinx DMA/Bridge Subsystem for PCIe (XDMA) provides up to 16 user interrupt wires to user logic that generate interrupts to the host. This patch adds APIs to enable/disable user logic interrupt for a given interrupt wire index. Signed-off-by: Lizhi Hou Signed-off-by: Sonal Santan Signed-off-by: Max Zhen Signed-off-by: Brian Xu Tested-by: Martin Tuma Link: https://lore.kernel.org/r/1674145926-29449-3-git-send-email-lizhi.hou@amd.com Signed-off-by: Vinod Koul --- include/linux/dma/amd_xdma.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/dma/amd_xdma.h (limited to 'include/linux') diff --git a/include/linux/dma/amd_xdma.h b/include/linux/dma/amd_xdma.h new file mode 100644 index 000000000000..ceba69ed7cb4 --- /dev/null +++ b/include/linux/dma/amd_xdma.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2022, Advanced Micro Devices, Inc. + */ + +#ifndef _DMAENGINE_AMD_XDMA_H +#define _DMAENGINE_AMD_XDMA_H + +#include +#include + +int xdma_enable_user_irq(struct platform_device *pdev, u32 irq_num); +void xdma_disable_user_irq(struct platform_device *pdev, u32 irq_num); +int xdma_get_user_irq(struct platform_device *pdev, u32 user_irq_index); + +#endif /* _DMAENGINE_AMD_XDMA_H */ -- cgit v1.2.3 From 8b5443102cfca7a4346a50918f8ecc8a1644ea2e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 30 Jan 2023 13:05:03 +0200 Subject: dmaengine: Make an order in struct dma_device definition Make an order in struct dma_device: - added missing kernel doc descriptions - put descriptions in the order of appearance in the code - updated indentation where it makes sense Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230130110503.52250-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c923f4e60f24..bcfece892812 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -773,6 +773,7 @@ struct dma_filter { /** * struct dma_device - info on the entity supplying DMA services + * @ref: reference is taken and put every time a channel is allocated or freed * @chancnt: how many DMA channels are supported * @privatecnt: how many DMA channels are requested by dma_request_channel * @channels: the list of struct dma_chan @@ -789,6 +790,7 @@ struct dma_filter { * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @owner: owner module (automatically set based on the provided dev) + * @chan_ida: unique channel ID * @src_addr_widths: bit mask of src addr widths the device supports * Width is specified in bytes, e.g. for a device supporting * a width of 4 the mask should have BIT(4) set. @@ -802,6 +804,7 @@ struct dma_filter { * @max_sg_burst: max number of SG list entries executed in a single burst * DMA tansaction with no software intervention for reinitialization. * Zero value means unlimited number of entries. + * @descriptor_reuse: a submitted transfer can be resubmitted after completion * @residue_granularity: granularity of the transfer residue reported * by tx_status * @device_alloc_chan_resources: allocate resources and return the @@ -839,7 +842,6 @@ struct dma_filter { * struct with auxiliary transfer status information, otherwise the call * will just return a simple status code * @device_issue_pending: push pending transactions to hardware - * @descriptor_reuse: a submitted transfer can be resubmitted after completion * @device_release: called sometime atfer dma_async_device_unregister() is * called and there are no further references to this structure. This * must be implemented to free resources however many existing drivers @@ -847,6 +849,7 @@ struct dma_filter { * @dbg_summary_show: optional routine to show contents in debugfs; default code * will be used when this is omitted, but custom code can show extra, * controller specific information. + * @dbg_dev_root: the root folder in debugfs for this device */ struct dma_device { struct kref ref; @@ -855,7 +858,7 @@ struct dma_device { struct list_head channels; struct list_head global_node; struct dma_filter filter; - dma_cap_mask_t cap_mask; + dma_cap_mask_t cap_mask; enum dma_desc_metadata_mode desc_metadata_modes; unsigned short max_xor; unsigned short max_pq; @@ -924,10 +927,8 @@ struct dma_device { struct dma_chan *chan, dma_addr_t dst, u64 data, unsigned long flags); - void (*device_caps)(struct dma_chan *chan, - struct dma_slave_caps *caps); - int (*device_config)(struct dma_chan *chan, - struct dma_slave_config *config); + void (*device_caps)(struct dma_chan *chan, struct dma_slave_caps *caps); + int (*device_config)(struct dma_chan *chan, struct dma_slave_config *config); int (*device_pause)(struct dma_chan *chan); int (*device_resume)(struct dma_chan *chan); int (*device_terminate_all)(struct dma_chan *chan); -- cgit v1.2.3 From f1db99c07b4f0a9edc45f3498d4d13d13a41836a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 Feb 2023 15:31:51 +0200 Subject: string_helpers: Move string_is_valid() to the header Move string_is_valid() to the header for wider use. While at it, rename to string_is_terminated() to be precise about its semantics. Signed-off-by: Andy Shevchenko Reviewed-by: Simon Horman Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20230208133153.22528-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jakub Kicinski --- include/linux/string_helpers.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 8530c7328269..fae6beaaa217 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -11,6 +11,11 @@ struct device; struct file; struct task_struct; +static inline bool string_is_terminated(const char *s, int len) +{ + return memchr(s, '\0', len) ? true : false; +} + /* Descriptions of the types of units to * print in */ enum string_size_units { -- cgit v1.2.3 From 025a785ff083729819dc82ac81baf190cb4aee5c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Feb 2023 22:06:42 -0800 Subject: net: skbuff: drop the word head from skb cache skbuff_head_cache is misnamed (perhaps for historical reasons?) because it does not hold heads. Head is the buffer which skb->data points to, and also where shinfo lives. struct sk_buff is a metadata structure, not the head. Eric recently added skb_small_head_cache (which allocates actual head buffers), let that serve as an excuse to finally clean this up :) Leave the user-space visible name intact, it could possibly be uAPI. Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c3df3b55da97..47ab28a37f2f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1243,7 +1243,7 @@ static inline void consume_skb(struct sk_buff *skb) void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); -extern struct kmem_cache *skbuff_head_cache; +extern struct kmem_cache *skbuff_cache; void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, -- cgit v1.2.3 From 8c99377e614f8abfd881c34611002b2af5ab1ee8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Feb 2023 10:35:56 +0100 Subject: driver core: bus: add bus_get_dev_root() function Instead of poking around in the struct bus_type directly for the dev_root pointer, provide a function to return it properly reference counted, if it is present in the bus. This will be needed to move the pointer out of struct bus_type in the future. Use the function in the driver core code at the same time it is introduced to verify that it works properly. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230209093556.19132-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 31be18608f83..6ce32ef4b8fd 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -282,5 +282,6 @@ enum bus_notifier_event { }; extern struct kset *bus_get_kset(const struct bus_type *bus); +struct device *bus_get_dev_root(const struct bus_type *bus); #endif -- cgit v1.2.3 From 3efc61d95259956db25347e2a9562c3e54546e20 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 29 Jan 2023 09:28:56 +0100 Subject: fbdev: Fix invalid page access after closing deferred I/O devices When a fbdev with deferred I/O is once opened and closed, the dirty pages still remain queued in the pageref list, and eventually later those may be processed in the delayed work. This may lead to a corruption of pages, hitting an Oops. This patch makes sure to cancel the delayed work and clean up the pageref list at closing the device for addressing the bug. A part of the cleanup code is factored out as a new helper function that is called from the common fb_release(). Reviewed-by: Patrik Jakobsson Cc: Signed-off-by: Takashi Iwai Tested-by: Miko Larsson Fixes: 56c134f7f1b5 ("fbdev: Track deferred-I/O pages in pageref struct") Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230129082856.22113-1-tiwai@suse.de --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 96b96323e9cb..73eb1f85ea8e 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -662,6 +662,7 @@ extern int fb_deferred_io_init(struct fb_info *info); extern void fb_deferred_io_open(struct fb_info *info, struct inode *inode, struct file *file); +extern void fb_deferred_io_release(struct fb_info *info); extern void fb_deferred_io_cleanup(struct fb_info *info); extern int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync); -- cgit v1.2.3 From 67c7debbfc3b1ccfe18e10b7e7663737f7dff0e3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Feb 2023 16:52:38 +0200 Subject: platform/x86: Fix header inclusion in linux/platform_data/x86/soc.h First of all, we don't use intel-family.h directly. On the other hand we actively use boolean type, that is defined in the types.h (we take top-level header for that) and x86_cpu_id, that is provided in the mod_devicetable.h. Secondly, we don't need to spread SOC_INTEL_IS_CPU() macro to the users. Hence, undefine it when it's appropriate. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230206145238.19460-1-andriy.shevchenko@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/soc.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/soc.h b/include/linux/platform_data/x86/soc.h index da05f425587a..a5705189e2ac 100644 --- a/include/linux/platform_data/x86/soc.h +++ b/include/linux/platform_data/x86/soc.h @@ -8,10 +8,13 @@ #ifndef __PLATFORM_DATA_X86_SOC_H #define __PLATFORM_DATA_X86_SOC_H +#include + #if IS_ENABLED(CONFIG_X86) +#include + #include -#include #define SOC_INTEL_IS_CPU(soc, type) \ static inline bool soc_intel_is_##soc(void) \ @@ -34,6 +37,8 @@ SOC_INTEL_IS_CPU(apl, ATOM_GOLDMONT); SOC_INTEL_IS_CPU(glk, ATOM_GOLDMONT_PLUS); SOC_INTEL_IS_CPU(cml, KABYLAKE_L); +#undef SOC_INTEL_IS_CPU + #else /* IS_ENABLED(CONFIG_X86) */ static inline bool soc_intel_is_byt(void) -- cgit v1.2.3 From 6e80133abeb09721ec4601de5b1e68be67135309 Mon Sep 17 00:00:00 2001 From: William Zhang Date: Thu, 9 Feb 2023 12:02:39 -0800 Subject: spi: export spi_transfer_cs_change_delay_exec function For SPI controller that implements transfer_one_message, it needs to insert the delay that required by cs change event between the transfers. Add a wrapper for the local function _spi_transfer_cs_change_delay_exec and export it for SPI controller driver to use. Signed-off-by: William Zhang Link: https://lore.kernel.org/r/20230209200246.141520-9-william.zhang@broadcom.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 9b23a1d0dd0d..88cb2a1390ce 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -26,6 +26,7 @@ struct spi_controller; struct spi_transfer; struct spi_controller_mem_ops; struct spi_controller_mem_caps; +struct spi_message; /* * INTERFACES between SPI master-side drivers and SPI slave protocol handlers, @@ -119,6 +120,8 @@ struct spi_delay { extern int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer); extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); +extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, + struct spi_transfer *xfer); /** * struct spi_device - Controller side proxy for an SPI slave device @@ -283,8 +286,6 @@ static inline void spi_set_csgpiod(struct spi_device *spi, u8 idx, struct gpio_d spi->cs_gpiod = csgpiod; } -struct spi_message; - /** * struct spi_driver - Host side "protocol" driver * @id_table: List of SPI devices supported by this driver -- cgit v1.2.3 From 53c0e2f9b808fafaea7bec91eeec48046bcaaba0 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:14:03 +0300 Subject: dmaengine: dw-edma: Replace chip ID number with device name Using an abstract number as the DW eDMA chip identifier isn't practical because there can be more than one DW eDMA controller on the platform. Some may be detected as the PCIe Endpoints, and others may be embedded in DW PCIe Root Port/Endpoint controllers. An abstract number in, for instance, the IRQ handlers list, doesn't give a notion regarding their reference to the particular DMA controller. To preserve the code simplicity and support multi-eDMA platforms, use the parental device name to create the DW eDMA controller name. Link: https://lore.kernel.org/r/20230113171409.30470-22-Sergey.Semin@baikalelectronics.ru Tested-by: Manivannan Sadhasivam Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Acked-by: Vinod Koul --- include/linux/dma/edma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index 07a23ecc834f..96bfd0173f3a 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -76,7 +76,6 @@ enum dw_edma_chip_flags { */ struct dw_edma_chip { struct device *dev; - int id; int nr_irqs; const struct dw_edma_core_ops *ops; u32 flags; -- cgit v1.2.3 From 93c177fd6ff0655a5fa43ec945a57d7b0200ad80 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:21 -0800 Subject: kernel/range: Uplevel the cxl subsystem's range_contains() helper In support of the CXL subsystem's use of 'struct range' to track decode address ranges, add a common range_contains() implementation with identical semantics as resource_contains(); The existing 'range_contains()' in lib/stackinit_kunit.c is namespaced with a 'stackinit_' prefix. Cc: Kees Cook Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Gregory Price Reviewed-by: Ira Weiny Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601998163.1924368.6067392174077323935.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/linux/range.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/range.h b/include/linux/range.h index 274681cc3154..7efb6a9b069b 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -13,6 +13,11 @@ static inline u64 range_len(const struct range *range) return range->end - range->start + 1; } +static inline bool range_contains(struct range *r1, struct range *r2) +{ + return r1->start <= r2->start && r1->end >= r2->end; +} + int add_range(struct range *range, int az, int nr_range, u64 start, u64 end); -- cgit v1.2.3 From fe098574a93b4e2acb046b583e9857337d807f38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:02 -0800 Subject: dax/hmem: Convey the dax range via memregion_info() In preparation for hmem platform devices to be unregistered, stop using platform_device_add_resources() to convey the address range. The platform_device_add_resources() API causes an existing "Soft Reserved" iomem resource to be re-parented under an inserted platform device resource. When that platform device is deleted it removes the platform device resource and all children. Instead, it is sufficient to convey just the address range and let request_mem_region() insert resources to indicate the devices active in the range. This allows the "Soft Reserved" resource to be re-enumerated upon the next probe event. Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167602002217.1924368.7036275892522551624.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/linux/memregion.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memregion.h b/include/linux/memregion.h index bf83363807ac..c01321467789 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -3,10 +3,12 @@ #define _MEMREGION_H_ #include #include +#include #include struct memregion_info { int target_node; + struct range range; }; #ifdef CONFIG_MEMREGION -- cgit v1.2.3 From 7dab174e2e27eeaf10273e597ffbef4f8ea032bb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:07 -0800 Subject: dax/hmem: Move hmem device registration to dax_hmem.ko In preparation for the CXL region driver to take over the responsibility of registering device-dax instances for CXL regions, move the registration of "hmem" devices to dax_hmem.ko. Previously the builtin component of this enabling (drivers/dax/hmem/device.o) would register platform devices for each address range and trigger the dax_hmem.ko module to load and attach device-dax instances to those devices. Now, the ranges are collected from the HMAT and EFI memory map walking, but the device creation is deferred. A new "hmem_platform" device is created which triggers dax_hmem.ko to load and register the platform devices. Tested-by: Fan Ni Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167602002771.1924368.5653558226424530127.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/linux/dax.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 2b5ecb591059..bf6258472e49 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -262,11 +262,14 @@ static inline bool dax_mapping(struct address_space *mapping) } #ifdef CONFIG_DEV_DAX_HMEM_DEVICES -void hmem_register_device(int target_nid, struct resource *r); +void hmem_register_resource(int target_nid, struct resource *r); #else -static inline void hmem_register_device(int target_nid, struct resource *r) +static inline void hmem_register_resource(int target_nid, struct resource *r) { } #endif +typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, + const struct resource *res); +int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); #endif -- cgit v1.2.3 From b6c1a8af5b1eec42aabc13376f94aa90c3d765f1 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 10 Feb 2023 15:47:31 +0000 Subject: mm: memcontrol: add new kernel parameter cgroup.memory=nobpf Add new kernel parameter cgroup.memory=nobpf to allow user disable bpf memory accounting. This is a preparation for the followup patch. Signed-off-by: Yafang Shao Acked-by: Johannes Weiner Acked-by: Roman Gushchin Link: https://lore.kernel.org/r/20230210154734.4416-2-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/memcontrol.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 85dc9b88ea37..1e38e99998c7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1754,6 +1754,12 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page); int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); +extern struct static_key_false memcg_bpf_enabled_key; +static inline bool memcg_bpf_enabled(void) +{ + return static_branch_likely(&memcg_bpf_enabled_key); +} + extern struct static_key_false memcg_kmem_enabled_key; static inline bool memcg_kmem_enabled(void) @@ -1832,6 +1838,11 @@ static inline struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) return NULL; } +static inline bool memcg_bpf_enabled(void) +{ + return false; +} + static inline bool memcg_kmem_enabled(void) { return false; -- cgit v1.2.3 From ddef81b5fd1da4d7c3cc8785d2043b73b72f38ef Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 10 Feb 2023 15:47:32 +0000 Subject: bpf: use bpf_map_kvcalloc in bpf_local_storage Introduce new helper bpf_map_kvcalloc() for the memory allocation in bpf_local_storage(). Then the allocation will charge the memory from the map instead of from current, though currently they are the same thing as it is only used in map creation path now. By charging map's memory into the memcg from the map, it will be more clear. Signed-off-by: Yafang Shao Acked-by: Johannes Weiner Acked-by: Roman Gushchin Link: https://lore.kernel.org/r/20230210154734.4416-3-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 35c18a98c21a..fe0bf482fdf8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1886,6 +1886,8 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); +void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, + gfp_t flags); void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags); #else @@ -1902,6 +1904,12 @@ bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) return kzalloc(size, flags); } +static inline void * +bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags) +{ + return kvcalloc(n, size, flags); +} + static inline void __percpu * bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags) -- cgit v1.2.3 From ee53cbfb1ebf990de0d084a7cd6b67b05fe1f7ac Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 10 Feb 2023 15:47:33 +0000 Subject: bpf: allow to disable bpf map memory accounting We can simply set root memcg as the map's memcg to disable bpf memory accounting. bpf_map_area_alloc is a little special as it gets the memcg from current rather than from the map, so we need to disable GFP_ACCOUNT specifically for it. Signed-off-by: Yafang Shao Acked-by: Johannes Weiner Acked-by: Roman Gushchin Link: https://lore.kernel.org/r/20230210154734.4416-4-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fe0bf482fdf8..4385418118f6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -28,6 +28,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -2933,4 +2934,11 @@ static inline bool type_is_alloc(u32 type) return type & MEM_ALLOC; } +static inline gfp_t bpf_memcg_flags(gfp_t flags) +{ + if (memcg_bpf_enabled()) + return flags | __GFP_ACCOUNT; + return flags; +} + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From ffb1b4a41016295e298409c9dbcacd55680bd6d4 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 10 Feb 2023 14:42:01 -0800 Subject: x86/unwind/orc: Add 'signal' field to ORC metadata Add a 'signal' field which allows unwind hints to specify whether the instruction pointer should be taken literally (like for most interrupts and exceptions) rather than decremented (like for call stack return addresses) when used to find the next ORC entry. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/d2c5ec4d83a45b513d8fd72fab59f1a8cfa46871.1676068346.git.jpoimboe@kernel.org --- include/linux/objtool.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 62c54ffbeeaa..9ac3df3fccf0 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -15,6 +15,7 @@ struct unwind_hint { s16 sp_offset; u8 sp_reg; u8 type; + u8 signal; u8 end; }; #endif @@ -49,7 +50,7 @@ struct unwind_hint { #ifndef __ASSEMBLY__ -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ +#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \ "987: \n\t" \ ".pushsection .discard.unwind_hints\n\t" \ /* struct unwind_hint */ \ @@ -57,6 +58,7 @@ struct unwind_hint { ".short " __stringify(sp_offset) "\n\t" \ ".byte " __stringify(sp_reg) "\n\t" \ ".byte " __stringify(type) "\n\t" \ + ".byte " __stringify(signal) "\n\t" \ ".byte " __stringify(end) "\n\t" \ ".balign 4 \n\t" \ ".popsection\n\t" @@ -129,7 +131,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -137,6 +139,7 @@ struct unwind_hint { .short \sp_offset .byte \sp_reg .byte \type + .byte \signal .byte \end .balign 4 .popsection @@ -174,7 +177,7 @@ struct unwind_hint { #ifndef __ASSEMBLY__ -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ +#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) @@ -182,7 +185,7 @@ struct unwind_hint { #define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm -- cgit v1.2.3 From b6c7abd1c28a63ad633433d037ee15a1bc3023ba Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 12 Feb 2023 15:13:03 +0000 Subject: tracing: Fix TASK_COMM_LEN in trace event format file After commit 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN"), the content of the format file under /sys/kernel/tracing/events/task/task_newtask was changed from field:char comm[16]; offset:12; size:16; signed:0; to field:char comm[TASK_COMM_LEN]; offset:12; size:16; signed:0; John reported that this change breaks older versions of perfetto. Then Mathieu pointed out that this behavioral change was caused by the use of __stringify(_len), which happens to work on macros, but not on enum labels. And he also gave the suggestion on how to fix it: :One possible solution to make this more robust would be to extend :struct trace_event_fields with one more field that indicates the length :of an array as an actual integer, without storing it in its stringified :form in the type, and do the formatting in f_show where it belongs. The result as follows after this change, $ cat /sys/kernel/tracing/events/task/task_newtask/format field:char comm[16]; offset:12; size:16; signed:0; Link: https://lore.kernel.org/lkml/Y+QaZtz55LIirsUO@google.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230210155921.4610-1-laoar.shao@gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230212151303.12353-1-laoar.shao@gmail.com Cc: stable@vger.kernel.org Cc: Alexei Starovoitov Cc: Kajetan Puchalski CC: Qais Yousef Fixes: 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN") Reported-by: John Stultz Debugged-by: Mathieu Desnoyers Suggested-by: Mathieu Desnoyers Suggested-by: Steven Rostedt Signed-off-by: Yafang Shao Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 4342e996bcdb..0e373222a6df 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -270,6 +270,7 @@ struct trace_event_fields { const int align; const int is_signed; const int filter_type; + const int len; }; int (*define_fields)(struct trace_event_call *); }; -- cgit v1.2.3 From 6c1976addf3623e979b7a954e216004d559bcb42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 9 Jan 2023 23:59:42 +0000 Subject: KEYS: Add new function key_create() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit key_create() works like key_create_or_update() but does not allow updating an existing key, instead returning ERR_PTR(-EEXIST). key_create() will be used by the blacklist keyring which should not create duplicate entries or update existing entries. Instead a dedicated message with appropriate severity will be logged. Signed-off-by: Thomas Weißschuh Signed-off-by: Jarkko Sakkinen --- include/linux/key.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index d27477faf00d..8dc7f7c3088b 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -386,6 +386,14 @@ extern int wait_for_key_construction(struct key *key, bool intr); extern int key_validate(const struct key *key); +extern key_ref_t key_create(key_ref_t keyring, + const char *type, + const char *description, + const void *payload, + size_t plen, + key_perm_t perm, + unsigned long flags); + extern key_ref_t key_create_or_update(key_ref_t keyring, const char *type, const char *description, -- cgit v1.2.3 From 85b93bbd1c9768d09adebbe9f33bab0d4ec94404 Mon Sep 17 00:00:00 2001 From: Julien Gomes Date: Wed, 8 Feb 2023 11:58:36 -0800 Subject: tpm: add vendor flag to command code validation Some TPM 2.0 devices have support for additional commands which are not part of the TPM 2.0 specifications. These commands are identified with bit 29 of the 32 bits command codes. Contrarily to other fields of the TPMA_CC spec structure used to list available commands, the Vendor flag also has to be present in the command code itself (TPM_CC) when called. Add this flag to tpm_find_cc() mask to prevent blocking vendor command codes that can actually be supported by the underlying TPM device. Signed-off-by: Julien Gomes Tested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index dfeb25a0362d..4dc97b9f65fb 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -265,6 +265,7 @@ enum tpm2_startup_types { enum tpm2_cc_attrs { TPM2_CC_ATTR_CHANDLES = 25, TPM2_CC_ATTR_RHANDLE = 28, + TPM2_CC_ATTR_VENDOR = 29, }; #define TPM_VID_INTEL 0x8086 -- cgit v1.2.3 From c35e03eaece71101ff6cbf776b86403860ac8cc3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 31 Jan 2023 16:01:45 +0800 Subject: crypto: api - Add scaffolding to change completion function signature The crypto completion function currently takes a pointer to a struct crypto_async_request object. However, in reality the API does not allow the use of any part of the object apart from the data field. For example, ahash/shash will create a fake object on the stack to pass along a different data field. This leads to potential bugs where the user may try to dereference or otherwise use the crypto_async_request object. This patch adds some temporary scaffolding so that the completion function can take a void * instead. Once affected users have been converted this can be removed. The helper crypto_request_complete will remain even after the conversion is complete. It should be used instead of calling the completion function directly. Signed-off-by: Herbert Xu Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- include/linux/crypto.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 5d1e961f810e..b18f6e669fb1 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -176,6 +176,7 @@ struct crypto_async_request; struct crypto_tfm; struct crypto_type; +typedef struct crypto_async_request crypto_completion_data_t; typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err); /** @@ -595,6 +596,11 @@ struct crypto_wait { /* * Async ops completion helper functioons */ +static inline void *crypto_get_completion_data(crypto_completion_data_t *req) +{ + return req->data; +} + void crypto_req_done(struct crypto_async_request *req, int err); static inline int crypto_wait_req(int err, struct crypto_wait *wait) -- cgit v1.2.3 From 255e48eb17684157336bd6dd98d22c1b2d9e3f43 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 8 Feb 2023 13:58:44 +0800 Subject: crypto: api - Use data directly in completion function This patch does the final flag day conversion of all completion functions which are now all contained in the Crypto API. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b18f6e669fb1..80f6350fb588 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -176,8 +176,8 @@ struct crypto_async_request; struct crypto_tfm; struct crypto_type; -typedef struct crypto_async_request crypto_completion_data_t; -typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err); +typedef void crypto_completion_data_t; +typedef void (*crypto_completion_t)(void *req, int err); /** * DOC: Block Cipher Context Data Structures @@ -596,12 +596,12 @@ struct crypto_wait { /* * Async ops completion helper functioons */ -static inline void *crypto_get_completion_data(crypto_completion_data_t *req) +static inline void *crypto_get_completion_data(void *data) { - return req->data; + return data; } -void crypto_req_done(struct crypto_async_request *req, int err); +void crypto_req_done(void *req, int err); static inline int crypto_wait_req(int err, struct crypto_wait *wait) { -- cgit v1.2.3 From 846366b0df0ad19051fbdd38734b1f7690d814c3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 6 Feb 2023 18:22:46 +0800 Subject: crypto: api - Remove completion function scaffolding This patch removes the temporary scaffolding now that the comletion function signature has been converted. Signed-off-by: Herbert Xu Acked-by: Jarkko Sakkinen Signed-off-by: Herbert Xu --- include/linux/crypto.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 80f6350fb588..bb1d9b0e1647 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -176,7 +176,6 @@ struct crypto_async_request; struct crypto_tfm; struct crypto_type; -typedef void crypto_completion_data_t; typedef void (*crypto_completion_t)(void *req, int err); /** @@ -596,11 +595,6 @@ struct crypto_wait { /* * Async ops completion helper functioons */ -static inline void *crypto_get_completion_data(void *data) -{ - return data; -} - void crypto_req_done(void *req, int err); static inline int crypto_wait_req(int err, struct crypto_wait *wait) -- cgit v1.2.3 From 5b4e9a7a71ab912d150cb2276cb23af51c863150 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 10 Feb 2023 16:50:16 -0800 Subject: net: ethtool: extend ringparam set/get APIs for rx_push Similar to what was done for TX_PUSH, add an RX_PUSH concept to the ethtool interfaces. Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- include/linux/ethtool.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 515c78d8eb7c..2792185dda22 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -73,12 +73,14 @@ enum { * @rx_buf_len: Current length of buffers on the rx ring. * @tcp_data_split: Scatter packet headers and data to separate buffers * @tx_push: The flag of tx push mode + * @rx_push: The flag of rx push mode * @cqe_size: Size of TX/RX completion queue event */ struct kernel_ethtool_ringparam { u32 rx_buf_len; u8 tcp_data_split; u8 tx_push; + u8 rx_push; u32 cqe_size; }; @@ -87,11 +89,13 @@ struct kernel_ethtool_ringparam { * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size * @ETHTOOL_RING_USE_TX_PUSH: capture for setting tx_push + * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), ETHTOOL_RING_USE_CQE_SIZE = BIT(1), ETHTOOL_RING_USE_TX_PUSH = BIT(2), + ETHTOOL_RING_USE_RX_PUSH = BIT(3), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) -- cgit v1.2.3 From 26917eab144c8515435ef9175fdd5dddf9f0f000 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 8 Feb 2023 08:33:27 +0200 Subject: platform_data/mlxreg: Add field with mapped resource address Add field with PCIe remapped based address for passing it across relevant platform drivers sharing common system resources. Signed-off-by: Vadim Pasternak Reviewed-by: Michael Shych Link: https://lore.kernel.org/r/20230208063331.15560-11-vadimp@nvidia.com Signed-off-by: Hans de Goede --- include/linux/platform_data/mlxreg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index a6bd74e29b6b..0b9f81a6f753 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -216,6 +216,7 @@ struct mlxreg_core_platform_data { * @mask_low: low aggregation interrupt common mask; * @deferred_nr: I2C adapter number must be exist prior probing execution; * @shift_nr: I2C adapter numbers must be incremented by this value; + * @addr: mapped resource address; * @handle: handle to be passed by callback; * @completion_notify: callback to notify when platform driver probing is done; */ @@ -230,6 +231,7 @@ struct mlxreg_core_hotplug_platform_data { u32 mask_low; int deferred_nr; int shift_nr; + void __iomem *addr; void *handle; int (*completion_notify)(void *handle, int id); }; -- cgit v1.2.3 From 14e47d1fb8f9596acc90a06a66808657a9c512b5 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sat, 11 Feb 2023 08:41:06 +0100 Subject: net: phy: add genphy_c45_read_eee_abilities() function Add generic function for EEE abilities defined by IEEE 802.3 specification. For now following registers are supported: - IEEE 802.3-2018 45.2.3.10 EEE control and capability 1 (Register 3.20) - IEEE 802.3cg-2019 45.2.1.186b 10BASE-T1L PMA status register (Register 1.2295) Since I was not able to find any flag signaling support of these registers, we should detect link mode abilities first and then based on these abilities doing EEE link modes detection. Results of EEE ability detection will be stored into new variable phydev->supported_eee. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mdio.h | 26 ++++++++++++++++++++++++++ include/linux/phy.h | 6 ++++++ 2 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index c0da30d63b1d..e75583f5d967 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -402,6 +402,32 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) return result; } +/** + * mii_eee_cap1_mod_linkmode_t() + * @adv: target the linkmode advertisement settings + * @val: register value + * + * A function that translates value of following registers to the linkmode: + * IEEE 802.3-2018 45.2.3.10 "EEE control and capability 1" register (3.20) + * IEEE 802.3-2018 45.2.7.13 "EEE advertisement 1" register (7.60) + * IEEE 802.3-2018 45.2.7.14 "EEE "link partner ability 1 register (7.61) + */ +static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + adv, val & MDIO_EEE_100TX); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + adv, val & MDIO_EEE_1000T); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + adv, val & MDIO_EEE_10GT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + adv, val & MDIO_EEE_1000KX); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + adv, val & MDIO_EEE_10GKX4); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + adv, val & MDIO_EEE_10GKR); +} + int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, diff --git a/include/linux/phy.h b/include/linux/phy.h index fbeba4fee8d4..c183a8a27986 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -52,6 +52,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; #define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features) #define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features) @@ -62,6 +63,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_ini #define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features) #define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features) #define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) +#define PHY_EEE_CAP1_FEATURES ((unsigned long *)&phy_eee_cap1_features) extern const int phy_basic_ports_array[3]; extern const int phy_fibre_port_array[1]; @@ -572,6 +574,7 @@ struct macsec_ops; * @supported: Combined MAC/PHY supported linkmodes * @advertising: Currently advertised linkmodes * @adv_old: Saved advertised while power saving for WoL + * @supported_eee: supported PHY EEE linkmodes * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited @@ -676,6 +679,8 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); /* used with phy_speed_down */ __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); + /* used for eee validation */ + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); /* Host supported PHY interface types. Should be ignored if empty. */ DECLARE_PHY_INTERFACE_MASK(host_interfaces); @@ -1737,6 +1742,7 @@ int genphy_c45_an_config_aneg(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); int genphy_c45_pma_read_abilities(struct phy_device *phydev); +int genphy_c45_read_eee_abilities(struct phy_device *phydev); int genphy_c45_pma_baset1_read_master_slave(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); int genphy_c45_baset1_read_status(struct phy_device *phydev); -- cgit v1.2.3 From cf9f6079696840093aa6ea3c0ee405a553afe2fb Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sat, 11 Feb 2023 08:41:08 +0100 Subject: net: phy: export phy_check_valid() function This function will be needed for genphy_c45_ethtool_get_eee() provided by next patch. Signed-off-by: Oleksij Rempel Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index c183a8a27986..7a8e541de3f3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1619,6 +1619,7 @@ int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); int phy_speed_down(struct phy_device *phydev, bool sync); int phy_speed_up(struct phy_device *phydev); +bool phy_check_valid(int speed, int duplex, unsigned long *features); int phy_restart_aneg(struct phy_device *phydev); int phy_reset_after_clk_enable(struct phy_device *phydev); -- cgit v1.2.3 From 022c3f87f88e2d68e90be7687d981c9cb893a3b1 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sat, 11 Feb 2023 08:41:09 +0100 Subject: net: phy: add genphy_c45_ethtool_get/set_eee() support Add replacement for phy_ethtool_get/set_eee() functions. Current phy_ethtool_get/set_eee() implementation is great and it is possible to make it even better: - this functionality is for devices implementing parts of IEEE 802.3 specification beyond Clause 22. The better place for this code is phy-c45.c - currently it is able to do read/write operations on PHYs with different abilities to not existing registers. It is better to use stored supported_eee abilities to avoid false read/write operations. - the eee_active detection will provide wrong results on not supported link modes. It is better to validate speed/duplex properties against supported EEE link modes. - it is able to support only limited amount of link modes. We have more EEE link modes... By refactoring this code I address most of this point except of the last one. Adding additional EEE link modes will need more work. Signed-off-by: Oleksij Rempel Signed-off-by: David S. Miller --- include/linux/mdio.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/phy.h | 7 +++++++ 2 files changed, 65 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index e75583f5d967..27013d6bf24a 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -428,6 +428,64 @@ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val) adv, val & MDIO_EEE_10GKR); } +/** + * linkmode_to_mii_eee_cap1_t() + * @adv: the linkmode advertisement settings + * + * A function that translates linkmode to value for IEEE 802.3-2018 45.2.7.13 + * "EEE advertisement 1" register (7.60) + */ +static inline u32 linkmode_to_mii_eee_cap1_t(unsigned long *adv) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, adv)) + result |= MDIO_EEE_100TX; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, adv)) + result |= MDIO_EEE_1000T; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, adv)) + result |= MDIO_EEE_10GT; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, adv)) + result |= MDIO_EEE_1000KX; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, adv)) + result |= MDIO_EEE_10GKX4; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, adv)) + result |= MDIO_EEE_10GKR; + + return result; +} + +/** + * mii_10base_t1_adv_mod_linkmode_t() + * @adv: linkmode advertisement settings + * @val: register value + * + * A function that translates IEEE 802.3cg-2019 45.2.7.26 "10BASE-T1 AN status" + * register (7.527) value to the linkmode. + */ +static inline void mii_10base_t1_adv_mod_linkmode_t(unsigned long *adv, u16 val) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, + adv, val & MDIO_AN_10BT1_AN_CTRL_ADV_EEE_T1L); +} + +/** + * linkmode_adv_to_mii_10base_t1_t() + * @adv: linkmode advertisement settings + * + * A function that translates the linkmode to IEEE 802.3cg-2019 45.2.7.25 + * "10BASE-T1 AN control" register (7.526) value. + */ +static inline u32 linkmode_adv_to_mii_10base_t1_t(unsigned long *adv) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, adv)) + result |= MDIO_AN_10BT1_AN_CTRL_ADV_EEE_T1L; + + return result; +} + int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, diff --git a/include/linux/phy.h b/include/linux/phy.h index 7a8e541de3f3..727bff531a14 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1758,6 +1758,13 @@ int genphy_c45_plca_set_cfg(struct phy_device *phydev, const struct phy_plca_cfg *plca_cfg); int genphy_c45_plca_get_status(struct phy_device *phydev, struct phy_plca_status *plca_st); +int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, + unsigned long *lp, bool *is_enabled); +int genphy_c45_ethtool_get_eee(struct phy_device *phydev, + struct ethtool_eee *data); +int genphy_c45_ethtool_set_eee(struct phy_device *phydev, + struct ethtool_eee *data); +int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; -- cgit v1.2.3 From 2b129f0b24ab578b02901b4a1744b7f97399faa0 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Thu, 9 Feb 2023 13:49:00 -0800 Subject: PCI/MSI: Provide missing stubs for CONFIG_PCI_MSI=n pci_msix_alloc_irq_at() and pci_msix_free_irq() are not declared when CONFIG_PCI_MSI is disabled. Users of these two calls do not yet exist but when users do appear (shown below is an attempt to use the new API in vfio-pci) the following errors will be encountered when compiling with CONFIG_PCI_MSI disabled: drivers/vfio/pci/vfio_pci_intrs.c:461:4: error: implicit declaration of\ function 'pci_msix_free_irq' is invalid in C99\ [-Werror,-Wimplicit-function-declaration] pci_msix_free_irq(pdev, msix_map); ^ drivers/vfio/pci/vfio_pci_intrs.c:511:15: error: implicit declaration of\ function 'pci_msix_alloc_irq_at' is invalid in C99\ [-Werror,-Wimplicit-function-declaration] msix_map = pci_msix_alloc_irq_at(pdev, vector, NULL); Provide definitions for pci_msix_alloc_irq_at() and pci_msix_free_irq() in preparation for users that need to compile when CONFIG_PCI_MSI is disabled. Reported-by: kernel test robot Fixes: 34026364df8e ("PCI/MSI: Provide post-enable dynamic allocation interfaces for MSI-X") Signed-off-by: Reinette Chatre Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/158e40e1cfcfc58ae30ecb2bbfaf86e5bba7a1ef.1675978686.git.reinette.chatre@intel.com --- include/linux/pci.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index adffd65e84b4..254c8a4126a8 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1621,6 +1621,18 @@ pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, flags, NULL); } +static inline struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index, + const struct irq_affinity_desc *affdesc) +{ + struct msi_map map = { .index = -ENOSYS, }; + + return map; +} + +static inline void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map) +{ +} + static inline void pci_free_irq_vectors(struct pci_dev *dev) { } -- cgit v1.2.3 From f3837ab7adbc1799a3c5648d34e3e27eb70709a6 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 3 Feb 2023 20:06:32 -0800 Subject: highmem: Enhance is_kmap_addr() to check kmap_local_page() mappings is_kmap_addr() is only looking at the kmap() address range which may cause check_heap_object() to miss checking an overflow on a kmap_local_page() page. Add a check for the kmap_local_page() address range to is_kmap_addr(). Cc: Matthew Wilcox Cc: Al Viro Cc: "Fabio M. De Francesco" Cc: Thomas Gleixner Cc: Christoph Hellwig Cc: Andrew Morton Signed-off-by: Ira Weiny Acked-by: Andrew Morton Signed-off-by: Jens Wiklander --- include/linux/highmem-internal.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index e098f38422af..a3028e400a9c 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -152,7 +152,10 @@ static inline void totalhigh_pages_add(long count) static inline bool is_kmap_addr(const void *x) { unsigned long addr = (unsigned long)x; - return addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP); + + return (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) || + (addr >= __fix_to_virt(FIX_KMAP_END) && + addr < __fix_to_virt(FIX_KMAP_BEGIN)); } #else /* CONFIG_HIGHMEM */ -- cgit v1.2.3 From 816477edfba6e7ab9411acec5f07cfa00e0882f7 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 3 Feb 2023 20:06:35 -0800 Subject: mm: Remove get_kernel_pages() The only caller to get_kernel_pages() [shm_get_kernel_pages()] has been updated to not need it. Remove get_kernel_pages(). Cc: Mel Gorman Cc: Al Viro Cc: "Fabio M. De Francesco" Cc: Christoph Hellwig Cc: Andrew Morton Acked-by: John Hubbard Signed-off-by: Ira Weiny Acked-by: Andrew Morton Reviewed-by: Christoph Hellwig Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8f857163ac89..2041e6d4fa27 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2095,8 +2095,6 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, struct task_struct *task, bool bypass_rlim); struct kvec; -int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, - struct page **pages); struct page *get_dump_page(unsigned long addr); bool folio_mark_dirty(struct folio *folio); -- cgit v1.2.3 From bba047f15851c8b053221f1b276eb7682d59f755 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 27 Jan 2023 15:02:00 +0100 Subject: wifi: mwifiex: Support SD8978 chipset The Marvell SD8978 (aka NXP IW416) uses identical registers as SD8987, so reuse the existing mwifiex_reg_sd8987 definition. Note that mwifiex_reg_sd8977 and mwifiex_reg_sd8997 are likewise identical, save for the fw_dump_ctrl register: They define it as 0xf0 whereas mwifiex_reg_sd8987 defines it as 0xf9. I've verified that 0xf9 is the correct value on SD8978. NXP's out-of-tree driver uses 0xf9 for all of them, so there's a chance that 0xf0 is not correct in the mwifiex_reg_sd8977 and mwifiex_reg_sd8997 definitions. I cannot test that for lack of hardware, hence am leaving it as is. NXP has only released a firmware which runs Bluetooth over UART. Perhaps Bluetooth over SDIO is unsupported by this chipset. Consequently, only an "sdiouart" firmware image is referenced, not an alternative "sdsd" image. Signed-off-by: Lukas Wunner Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/536b4f17a72ca460ad1b07045757043fb0778988.1674827105.git.lukas@wunner.de --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 74f9d9a6d330..0e4ef9c5127a 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -102,6 +102,7 @@ #define SDIO_DEVICE_ID_MARVELL_8977_BT 0x9146 #define SDIO_DEVICE_ID_MARVELL_8987_WLAN 0x9149 #define SDIO_DEVICE_ID_MARVELL_8987_BT 0x914a +#define SDIO_DEVICE_ID_MARVELL_8978_WLAN 0x9159 #define SDIO_VENDOR_ID_MEDIATEK 0x037a #define SDIO_DEVICE_ID_MEDIATEK_MT7663 0x7663 -- cgit v1.2.3 From 41a337b40e983db4f0e1602308109f2b93687a06 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 13 Feb 2023 16:50:05 +0000 Subject: PM: Add EXPORT macros for exporting PM functions Add a pair of macros for exporting functions only if CONFIG_PM is enabled. The naming follows the style of the standard EXPORT_SYMBOL_*() macros that they replace. Sometimes a module wants to export PM functions directly to other drivers, not a complete struct dev_pm_ops. A typical example is where a core library exports the generic (shared) implementation and calling code wraps one or more of these in custom code. Signed-off-by: Richard Fitzgerald Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 93cd34f00822..035d9649eba4 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -379,9 +379,13 @@ const struct dev_pm_ops name = { \ const struct dev_pm_ops name; \ __EXPORT_SYMBOL(name, sec, ns); \ const struct dev_pm_ops name +#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) +#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) #else #define _EXPORT_DEV_PM_OPS(name, sec, ns) \ static __maybe_unused const struct dev_pm_ops __static_##name +#define EXPORT_PM_FN_GPL(name) +#define EXPORT_PM_FN_NS_GPL(name, ns) #endif #define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") -- cgit v1.2.3 From 47d1932f37de99bae3345bb93f098ac8750ab0fb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Feb 2023 11:42:50 +0100 Subject: irqdomain: Drop revmap mutex The revmap mutex is essentially only used to maintain the integrity of the radix tree during updates (lookups use RCU). As the global irq_domain_mutex is now held in all paths that update the revmap structures there is strictly no longer any need for the dedicated mutex, which can be removed. Drop the revmap mutex and add lockdep assertions to the revmap helpers to make sure that the global lock is always held when updating the revmap. Tested-by: Hsin-Yi Wang Tested-by: Mark-PK Tsai Signed-off-by: Johan Hovold Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230213104302.17307-9-johan+linaro@kernel.org --- include/linux/irqdomain.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a372086750ca..16399de00b48 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -143,7 +143,6 @@ struct irq_domain_chip_generic; * Revmap data, used internally by the irq domain code: * @revmap_size: Size of the linear map table @revmap[] * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map - * @revmap_mutex: Lock for the revmap * @revmap: Linear table of irq_data pointers */ struct irq_domain { @@ -171,7 +170,6 @@ struct irq_domain { irq_hw_number_t hwirq_max; unsigned int revmap_size; struct radix_tree_root revmap_tree; - struct mutex revmap_mutex; struct irq_data __rcu *revmap[]; }; -- cgit v1.2.3 From 9dbb8e3452aba34e6fa4f63054b3adc66aceb7ec Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Feb 2023 11:43:02 +0100 Subject: irqdomain: Switch to per-domain locking The IRQ domain structures are currently protected by the global irq_domain_mutex. Switch to using more fine-grained per-domain locking, which can speed up parallel probing by reducing lock contention. On a recent arm64 laptop, the total time spent waiting for the locks during boot drops from 160 to 40 ms on average, while the maximum aggregate wait time drops from 550 to 90 ms over ten runs for example. Note that the domain lock of the root domain (innermost domain) must be used for hierarchical domains. For non-hierarchical domains (as for root domains), the new root pointer is set to the domain itself so that &domain->root->mutex always points to the right lock. Also note that hierarchical domains should be constructed using irq_domain_create_hierarchy() (or irq_domain_add_hierarchy()) to avoid having racing allocations access a not fully initialised domain. As a safeguard, the lockdep assertion in irq_domain_set_mapping() will catch any offenders that also fail to set the root domain pointer. Tested-by: Hsin-Yi Wang Tested-by: Mark-PK Tsai Signed-off-by: Johan Hovold Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230213104302.17307-21-johan+linaro@kernel.org --- include/linux/irqdomain.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 16399de00b48..d320d15d4fba 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -125,6 +125,8 @@ struct irq_domain_chip_generic; * core code. * @flags: Per irq_domain flags * @mapcount: The number of mapped interrupts + * @mutex: Domain lock, hierarchical domains use root domain's lock + * @root: Pointer to root domain, or containing structure if non-hierarchical * * Optional elements: * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy @@ -152,6 +154,8 @@ struct irq_domain { void *host_data; unsigned int flags; unsigned int mapcount; + struct mutex mutex; + struct irq_domain *root; /* Optional data */ struct fwnode_handle *fwnode; -- cgit v1.2.3 From 05b8773ca33253ea562be145cf3145b05ef19f86 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 22 Dec 2022 19:33:31 +0000 Subject: mtd: ubi: block: wire-up device parent ubiblock devices were previously only identifyable by their name, but not connected to their parent UBI volume device e.g. in sysfs. Properly parent ubiblock device as descendant of a UBI volume device to reflect device model hierachy. Signed-off-by: Daniel Golle Signed-off-by: Richard Weinberger --- include/linux/mtd/ubi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index 7d48ea368c5e..a529347fd75b 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -110,6 +110,7 @@ struct ubi_volume_info { int name_len; const char *name; dev_t cdev; + struct device *dev; }; /** -- cgit v1.2.3 From 6a3cd3318ff65622415e34e8ee39d76331e7c869 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Sun, 12 Feb 2023 01:27:07 -0800 Subject: bpf: Migrate release_on_unlock logic to non-owning ref semantics This patch introduces non-owning reference semantics to the verifier, specifically linked_list API kfunc handling. release_on_unlock logic for refs is refactored - with small functional changes - to implement these semantics, and bpf_list_push_{front,back} are migrated to use them. When a list node is pushed to a list, the program still has a pointer to the node: n = bpf_obj_new(typeof(*n)); bpf_spin_lock(&l); bpf_list_push_back(&l, n); /* n still points to the just-added node */ bpf_spin_unlock(&l); What the verifier considers n to be after the push, and thus what can be done with n, are changed by this patch. Common properties both before/after this patch: * After push, n is only a valid reference to the node until end of critical section * After push, n cannot be pushed to any list * After push, the program can read the node's fields using n Before: * After push, n retains the ref_obj_id which it received on bpf_obj_new, but the associated bpf_reference_state's release_on_unlock field is set to true * release_on_unlock field and associated logic is used to implement "n is only a valid ref until end of critical section" * After push, n cannot be written to, the node must be removed from the list before writing to its fields * After push, n is marked PTR_UNTRUSTED After: * After push, n's ref is released and ref_obj_id set to 0. NON_OWN_REF type flag is added to reg's type, indicating that it's a non-owning reference. * NON_OWN_REF flag and logic is used to implement "n is only a valid ref until end of critical section" * n can be written to (except for special fields e.g. bpf_list_node, timer, ...) Summary of specific implementation changes to achieve the above: * release_on_unlock field, ref_set_release_on_unlock helper, and logic to "release on unlock" based on that field are removed * The anonymous active_lock struct used by bpf_verifier_state is pulled out into a named struct bpf_active_lock. * NON_OWN_REF type flag is introduced along with verifier logic changes to handle non-owning refs * Helpers are added to use NON_OWN_REF flag to implement non-owning ref semantics as described above * invalidate_non_owning_refs - helper to clobber all non-owning refs matching a particular bpf_active_lock identity. Replaces release_on_unlock logic in process_spin_lock. * ref_set_non_owning - set NON_OWN_REF type flag after doing some sanity checking * ref_convert_owning_non_owning - convert owning reference w/ specified ref_obj_id to non-owning references. Set NON_OWN_REF flag for each reg with that ref_obj_id and 0-out its ref_obj_id * Update linked_list selftests to account for minor semantic differences introduced by this patch * Writes to a release_on_unlock node ref are not allowed, while writes to non-owning reference pointees are. As a result the linked_list "write after push" failure tests are no longer scenarios that should fail. * The test##missing_lock##op and test##incorrect_lock##op macro-generated failure tests need to have a valid node argument in order to have the same error output as before. Otherwise verification will fail early and the expected error output won't be seen. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230212092715.1422619-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++++ include/linux/bpf_verifier.h | 38 ++++++++++++++++++-------------------- 2 files changed, 24 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4385418118f6..8b5d0b4c4ada 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -181,6 +181,7 @@ enum btf_field_type { BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, BPF_LIST_HEAD = (1 << 4), BPF_LIST_NODE = (1 << 5), + BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD, }; struct btf_field_kptr { @@ -576,6 +577,11 @@ enum bpf_type_flag { /* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */ MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS), + /* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning. + * Currently only valid for linked-list and rbtree nodes. + */ + NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index aa83de1fe755..cf1bb1cf4a7b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -43,6 +43,22 @@ enum bpf_reg_liveness { REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */ }; +/* For every reg representing a map value or allocated object pointer, + * we consider the tuple of (ptr, id) for them to be unique in verifier + * context and conside them to not alias each other for the purposes of + * tracking lock state. + */ +struct bpf_active_lock { + /* This can either be reg->map_ptr or reg->btf. If ptr is NULL, + * there's no active lock held, and other fields have no + * meaning. If non-NULL, it indicates that a lock is held and + * id member has the reg->id of the register which can be >= 0. + */ + void *ptr; + /* This will be reg->id */ + u32 id; +}; + struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; @@ -226,11 +242,6 @@ struct bpf_reference_state { * exiting a callback function. */ int callback_ref; - /* Mark the reference state to release the registers sharing the same id - * on bpf_spin_unlock (for nodes that we will lose ownership to but are - * safe to access inside the critical section). - */ - bool release_on_unlock; }; /* state of the program: @@ -331,21 +342,8 @@ struct bpf_verifier_state { u32 branches; u32 insn_idx; u32 curframe; - /* For every reg representing a map value or allocated object pointer, - * we consider the tuple of (ptr, id) for them to be unique in verifier - * context and conside them to not alias each other for the purposes of - * tracking lock state. - */ - struct { - /* This can either be reg->map_ptr or reg->btf. If ptr is NULL, - * there's no active lock held, and other fields have no - * meaning. If non-NULL, it indicates that a lock is held and - * id member has the reg->id of the register which can be >= 0. - */ - void *ptr; - /* This will be reg->id */ - u32 id; - } active_lock; + + struct bpf_active_lock active_lock; bool speculative; bool active_rcu_lock; -- cgit v1.2.3 From d6dd5bafaabf98a99a76227ab8dc9a89e76a198f Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 24 Jan 2023 12:41:56 +0530 Subject: PCI: endpoint: Use a separate lock for protecting epc->pci_epf list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EPC controller maintains a list of EPF drivers added to it. For protecting this list against the concurrent accesses, the epc->lock (used for protecting epc_ops) has been used so far. Since there were no users trying to use epc_ops and modify the pci_epf list simultaneously, this was not an issue. But with the addition of callback mechanism for passing the events, this will be a problem. Because the pci_epf list needs to be iterated first for getting hold of the EPF driver and then the relevant event specific callback needs to be called for the driver. If the same epc->lock is used, then it will result in a deadlock scenario. For instance, ... mutex_lock(&epc->lock); list_for_each_entry(epf, &epc->pci_epf, list) { epf->event_ops->core_init(epf); | |-> pci_epc_set_bar(); | |-> mutex_lock(&epc->lock) # DEADLOCK ... So to fix this issue, use a separate lock called "list_lock" for protecting the pci_epf list against the concurrent accesses. This lock will also be used by the callback mechanism. Link: https://lore.kernel.org/linux-pci/20230124071158.5503-4-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index a48778e1a4ee..fe729dfe509b 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -122,6 +122,7 @@ struct pci_epc_mem { * struct pci_epc - represents the PCI EPC device * @dev: PCI EPC device * @pci_epf: list of endpoint functions present in this EPC device + * list_lock: Mutex for protecting pci_epf list * @ops: function pointers for performing endpoint operations * @windows: array of address space of the endpoint controller * @mem: first window of the endpoint controller, which corresponds to @@ -139,6 +140,7 @@ struct pci_epc_mem { struct pci_epc { struct device dev; struct list_head pci_epf; + struct mutex list_lock; const struct pci_epc_ops *ops; struct pci_epc_mem **windows; struct pci_epc_mem *mem; -- cgit v1.2.3 From 838125b07e7706b1a9069079a73507fd3df244f7 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 24 Jan 2023 12:41:57 +0530 Subject: PCI: endpoint: Use callback mechanism for passing events from EPC to EPF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using the notifiers for passing the events from EPC to EPF, let's introduce a callback based mechanism where the EPF drivers can populate relevant callbacks for EPC events they want to subscribe. The use of notifiers in kernel is not recommended if there is a real link between the sender and receiver, like in this case. Also, the existing atomic notifier forces the notification functions to be in atomic context while the caller may be in non-atomic context. For instance, the two in-kernel users of the notifiers, pcie-qcom and pcie-tegra194, both are calling the notifier functions in non-atomic context (from threaded IRQ handlers). This creates a sleeping in atomic context issue with the existing EPF_TEST driver that calls the EPC APIs that may sleep. For all these reasons, let's get rid of the notifier chains and use the simple callback mechanism for signalling the events from EPC to EPF drivers. This preserves the context of the caller and avoids the latency of going through a separate interface for triggering the notifications. As a first step of the transition, the core_init() callback is introduced in this commit, that'll replace the existing CORE_INIT notifier used for signalling the init complete event from EPC. During the occurrence of the event, EPC will go over the list of EPF drivers attached to it and will call the core_init() callback if available. Link: https://lore.kernel.org/linux-pci/20230124071158.5503-5-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epf.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 009a07147c61..fa629c191f00 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -18,7 +18,6 @@ struct pci_epf; enum pci_epc_interface_type; enum pci_notify_event { - CORE_INIT, LINK_UP, }; @@ -72,6 +71,14 @@ struct pci_epf_ops { struct config_group *group); }; +/** + * struct pci_epf_event_ops - Callbacks for capturing the EPC events + * @core_init: Callback for the EPC initialization complete event + */ +struct pci_epc_event_ops { + int (*core_init)(struct pci_epf *epf); +}; + /** * struct pci_epf_driver - represents the PCI EPF driver * @probe: ops to perform when a new EPF device has been bound to the EPF driver @@ -139,6 +146,7 @@ struct pci_epf_bar { * @is_vf: true - virtual function, false - physical function * @vfunction_num_map: bitmap to manage virtual function number * @pci_vepf: list of virtual endpoint functions associated with this function + * @event_ops: Callbacks for capturing the EPC events */ struct pci_epf { struct device dev; @@ -168,6 +176,7 @@ struct pci_epf { unsigned int is_vf; unsigned long vfunction_num_map; struct list_head pci_vepf; + const struct pci_epc_event_ops *event_ops; }; /** -- cgit v1.2.3 From f5edd8715e2ea672e6119c3764041743761fb178 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 24 Jan 2023 12:41:58 +0530 Subject: PCI: endpoint: Use link_up() callback in place of LINK_UP notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a part of the transition towards callback mechanism for signalling the events from EPC to EPF, let's use the link_up() callback in the place of the LINK_UP notifier. This also removes the notifier support completely from the PCI endpoint framework. Link: https://lore.kernel.org/linux-pci/20230124071158.5503-6-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epc.h | 8 -------- include/linux/pci-epf.h | 8 ++------ 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index fe729dfe509b..301bb0e53707 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -135,7 +135,6 @@ struct pci_epc_mem { * @group: configfs group representing the PCI EPC device * @lock: mutex to protect pci_epc ops * @function_num_map: bitmap to manage physical function number - * @notifier: used to notify EPF of any EPC events (like linkup) */ struct pci_epc { struct device dev; @@ -151,7 +150,6 @@ struct pci_epc { /* mutex to protect against concurrent access of EP controller */ struct mutex lock; unsigned long function_num_map; - struct atomic_notifier_head notifier; }; /** @@ -194,12 +192,6 @@ static inline void *epc_get_drvdata(struct pci_epc *epc) return dev_get_drvdata(&epc->dev); } -static inline int -pci_epc_register_notifier(struct pci_epc *epc, struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&epc->notifier, nb); -} - struct pci_epc * __devm_pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, struct module *owner); diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index fa629c191f00..a215dc8ce693 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -17,10 +17,6 @@ struct pci_epf; enum pci_epc_interface_type; -enum pci_notify_event { - LINK_UP, -}; - enum pci_barno { NO_BAR = -1, BAR_0, @@ -74,9 +70,11 @@ struct pci_epf_ops { /** * struct pci_epf_event_ops - Callbacks for capturing the EPC events * @core_init: Callback for the EPC initialization complete event + * @link_up: Callback for the EPC link up event */ struct pci_epc_event_ops { int (*core_init)(struct pci_epf *epf); + int (*link_up)(struct pci_epf *epf); }; /** @@ -134,7 +132,6 @@ struct pci_epf_bar { * @epf_pf: the physical EPF device to which this virtual EPF device is bound * @driver: the EPF driver to which this EPF device is bound * @list: to add pci_epf as a list of PCI endpoint functions to pci_epc - * @nb: notifier block to notify EPF of any EPC events (like linkup) * @lock: mutex to protect pci_epf_ops * @sec_epc: the secondary EPC device to which this EPF device is bound * @sec_epc_list: to add pci_epf as list of PCI endpoint functions to secondary @@ -162,7 +159,6 @@ struct pci_epf { struct pci_epf *epf_pf; struct pci_epf_driver *driver; struct list_head list; - struct notifier_block nb; /* mutex to protect against concurrent access of pci_epf_ops */ struct mutex lock; -- cgit v1.2.3 From 80df83c2c57e75cb482ccf0c639ce84703ab41a2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 4 Feb 2023 00:53:35 +0100 Subject: mmc: core: add devm_mmc_alloc_host Add a device-managed version of mmc_alloc_host(). The argument order is reversed compared to mmc_alloc_host() because device-managed functions typically have the device argument first. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/6d8f9fdc-7c9e-8e4f-e6ef-5470b971c74e@gmail.com Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8fdd3cf971a3..812e6b583b25 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -527,6 +527,7 @@ struct mmc_host { struct device_node; struct mmc_host *mmc_alloc_host(int extra, struct device *); +struct mmc_host *devm_mmc_alloc_host(struct device *dev, int extra); int mmc_add_host(struct mmc_host *); void mmc_remove_host(struct mmc_host *); void mmc_free_host(struct mmc_host *); -- cgit v1.2.3 From 6aa3a920125e9f58891e2b5dc2efd4d0c1ff05a6 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:50 -0600 Subject: mm/hugetlb: convert isolate_hugetlb to folios Patch series "continue hugetlb folio conversion", v3. This series continues the conversion of core hugetlb functions to use folios. This series converts many helper funtions in the hugetlb fault path. This is in preparation for another series to convert the hugetlb fault code paths to operate on folios. This patch (of 8): Convert isolate_hugetlb() to take in a folio and convert its callers to pass a folio. Use page_folio() to convert the callers to use a folio is safe as isolate_hugetlb() operates on a head page. Link: https://lkml.kernel.org/r/20230113223057.173292-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20230113223057.173292-2-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index a51e6daacac6..6e38a019f654 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -171,7 +171,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -int isolate_hugetlb(struct page *page, struct list_head *list); +int isolate_hugetlb(struct folio *folio, struct list_head *list); int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); @@ -413,7 +413,7 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return NULL; } -static inline int isolate_hugetlb(struct page *page, struct list_head *list) +static inline int isolate_hugetlb(struct folio *folio, struct list_head *list) { return -EBUSY; } -- cgit v1.2.3 From ff7d853b031302376a0d3640fa1c463d94079637 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:54 -0600 Subject: mm/hugetlb: increase use of folios in alloc_huge_page() Change hugetlb_cgroup_commit_charge{,_rsvd}(), dequeue_huge_page_vma() and alloc_buddy_huge_page_with_mpol() to use folios so alloc_huge_page() is cleaned by operating on folios until its return. Link: https://lkml.kernel.org/r/20230113223057.173292-6-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb_cgroup.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index f706626a8063..3d82d91f49ac 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -141,10 +141,10 @@ extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr); extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, - struct page *page); + struct folio *folio); extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, - struct page *page); + struct folio *folio); extern void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, struct folio *folio); extern void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, @@ -230,14 +230,14 @@ static inline int hugetlb_cgroup_charge_cgroup_rsvd(int idx, static inline void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, - struct page *page) + struct folio *folio) { } static inline void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, - struct page *page) + struct folio *folio) { } -- cgit v1.2.3 From e37d3e838d9078538f920957d1e89682b6764977 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:55 -0600 Subject: mm/hugetlb: convert alloc_migrate_huge_page to folios Change alloc_huge_page_nodemask() to alloc_hugetlb_folio_nodemask() and alloc_migrate_huge_page() to alloc_migrate_hugetlb_folio(). Both functions now return a folio rather than a page. Link: https://lkml.kernel.org/r/20230113223057.173292-7-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6e38a019f654..2375c62c61a4 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -719,7 +719,7 @@ struct huge_bootmem_page { int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, +struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); @@ -1040,8 +1040,8 @@ static inline struct page *alloc_huge_page(struct vm_area_struct *vma, return NULL; } -static inline struct page * -alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, +static inline struct folio * +alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask) { return NULL; -- cgit v1.2.3 From ea8e72f4116a995c2aba3fb738ac372c4115375a Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:32 -0800 Subject: mm/hugetlb: convert putback_active_hugepage to take in a folio Convert putback_active_hugepage() to folio_putback_active_hugetlb(), this removes one user of the Huge Page macros which take in a page. The callers in migrate.c are also cleaned up by being able to directly use the src and dst folio variables. Link: https://lkml.kernel.org/r/20230125170537.96973-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 2375c62c61a4..067906c5778e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -175,7 +175,7 @@ int isolate_hugetlb(struct folio *folio, struct list_head *list); int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); -void putback_active_hugepage(struct page *page); +void folio_putback_active_hugetlb(struct folio *folio); void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); @@ -429,7 +429,7 @@ static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, return 0; } -static inline void putback_active_hugepage(struct page *page) +static inline void folio_putback_active_hugetlb(struct folio *folio) { } -- cgit v1.2.3 From d0ce0e47b323a8d7fb5dc3314ce56afa650ade2d Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:33 -0800 Subject: mm/hugetlb: convert hugetlb fault paths to use alloc_hugetlb_folio() Change alloc_huge_page() to alloc_hugetlb_folio() by changing all callers to handle the now folio return type of the function. In this conversion, alloc_huge_page_vma() is also changed to alloc_hugetlb_folio_vma() and hugepage_add_new_anon_rmap() is changed to take in a folio directly. Many additions of '&folio->page' are cleaned up in subsequent patches. hugetlbfs_fallocate() is also refactored to use the RCU + page_cache_next_miss() API. Link: https://lkml.kernel.org/r/20230125170537.96973-5-sidhartha.kumar@oracle.com Suggested-by: Mike Kravetz Reported-by: kernel test robot Signed-off-by: Sidhartha Kumar Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 8 ++++---- include/linux/rmap.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 067906c5778e..6408f85e5754 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -717,11 +717,11 @@ struct huge_bootmem_page { }; int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); -struct page *alloc_huge_page(struct vm_area_struct *vma, +struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); -struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, +struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); @@ -1033,7 +1033,7 @@ static inline int isolate_or_dissolve_huge_page(struct page *page, return -ENOMEM; } -static inline struct page *alloc_huge_page(struct vm_area_struct *vma, +static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { @@ -1047,7 +1047,7 @@ alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, return NULL; } -static inline struct page *alloc_huge_page_vma(struct hstate *h, +static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address) { diff --git a/include/linux/rmap.h b/include/linux/rmap.h index a6bd1f0a183d..a4570da03e58 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -203,7 +203,7 @@ void page_remove_rmap(struct page *, struct vm_area_struct *, void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long address, rmap_t flags); -void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, +void hugepage_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); static inline void __page_dup_rmap(struct page *page, bool compound) -- cgit v1.2.3 From d2d7bb44bfbd29200426ba17741550d36e081f91 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:34 -0800 Subject: mm/hugetlb: convert restore_reserve_on_error to take in a folio Every caller of restore_reserve_on_error() is now passing in &folio->page, change the function to take in a folio directly and clean up the call sites. Link: https://lkml.kernel.org/r/20230125170537.96973-6-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6408f85e5754..20ceaaea1697 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -726,7 +726,7 @@ struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *v int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, - unsigned long address, struct page *page); + unsigned long address, struct folio *folio); /* arch callback */ int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); -- cgit v1.2.3 From 9b91c0e277a3dbb165c2e4301be7a231dc2f76f7 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:35 -0800 Subject: mm/hugetlb: convert hugetlb_add_to_page_cache to take in a folio Every caller of hugetlb_add_to_page_cache() is now passing in &folio->page, change the function to take in a folio directly and clean up the call sites. Link: https://lkml.kernel.org/r/20230125170537.96973-7-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 20ceaaea1697..df6dd624ccfe 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -723,7 +723,7 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); -int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, +int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, unsigned long address, struct folio *folio); -- cgit v1.2.3 From fa4e3f5ffa5e6e22f751d289c9afa502dda30b8d Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 30 Jan 2023 12:18:28 -0800 Subject: mm: add folio_estimated_sharers() Patch series "Convert various mempolicy.c functions to use folios", v4. This patch series converts migrate_page_add() and queue_pages_required() to migrate_folio_add() and queue_page_required(). It also converts the callers of the functions to use folios as well, and introduces a helper function to estimate the number of sharers of a folio. This patch (of 6): folio_estimated_sharers() takes in a folio and returns the precise number of times the first subpage of the folio is mapped. This function aims to provide an estimate for the number of sharers of a folio. This is necessary for folio conversions where we care about the number of processes that share a folio, but don't necessarily want to check every single page within that folio. This is in contrast to folio_mapcount() which calculates the total number of the times a folio and all its subpages are mapped. Link: https://lkml.kernel.org/r/20230130201833.27042-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20230130201833.27042-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Yin Fengwei Acked-by: David Hildenbrand Cc: Jane Chu Signed-off-by: Andrew Morton --- include/linux/mm.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9454b7eb055b..89c118ad4a44 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1916,6 +1916,24 @@ static inline size_t folio_size(struct folio *folio) return PAGE_SIZE << folio_order(folio); } +/** + * folio_estimated_sharers - Estimate the number of sharers of a folio. + * @folio: The folio. + * + * folio_estimated_sharers() aims to serve as a function to efficiently + * estimate the number of processes sharing a folio. This is done by + * looking at the precise mapcount of the first subpage in the folio, and + * assuming the other subpages are the same. This may not be true for large + * folios. If you want exact mapcounts for exact calculations, look at + * page_mapcount() or folio_total_mapcount(). + * + * Return: The estimated number of processes sharing a folio. + */ +static inline int folio_estimated_sharers(struct folio *folio) +{ + return page_mapcount(folio_page(folio, 0)); +} + #ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE static inline int arch_make_page_accessible(struct page *page) { -- cgit v1.2.3 From 3c1ea2c729ef8ef07bcb80d01ab2ead45b3406dd Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 30 Jan 2023 13:43:49 -0800 Subject: mm: add folio_get_nontail_page() Patch series "Convert a couple migrate functions to use folios", v2. This patchset introduces folio_movable_ops() and converts 3 functions in mm/migrate.c to use folios. It also introduces folio_get_nontail_page() for folio conversions which may want to distinguish between head and tail pages. This patch (of 4): folio_get_nontail_page() returns the folio associated with a head page. This is necessary for folio conversions where the behavior of that function differs between head pages and tail pages. Link: https://lkml.kernel.org/r/20230130214352.40538-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20230130214352.40538-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 89c118ad4a44..2992a2d55aee 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -892,6 +892,13 @@ static inline bool get_page_unless_zero(struct page *page) return page_ref_add_unless(page, 1, 0); } +static inline struct folio *folio_get_nontail_page(struct page *page) +{ + if (unlikely(!get_page_unless_zero(page))) + return NULL; + return (struct folio *)page; +} + extern int page_is_ram(unsigned long pfn); enum { -- cgit v1.2.3 From da707a6d184a8a6ef0b756c3ba49888fec223793 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 30 Jan 2023 13:43:50 -0800 Subject: mm/migrate: add folio_movable_ops() folio_movable_ops() does the same as page_movable_ops() except uses folios instead of pages. This function will help make folio conversions in migrate.c more readable. Link: https://lkml.kernel.org/r/20230130214352.40538-3-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/migrate.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3ef77f52a4f0..bdff950a8bb4 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -122,6 +122,15 @@ static inline bool folio_test_movable(struct folio *folio) return PageMovable(&folio->page); } +static inline +const struct movable_operations *folio_movable_ops(struct folio *folio) +{ + VM_BUG_ON(!__folio_test_movable(folio)); + + return (const struct movable_operations *) + ((unsigned long)folio->mapping - PAGE_MAPPING_MOVABLE); +} + static inline const struct movable_operations *page_movable_ops(struct page *page) { -- cgit v1.2.3 From a2b9b123ccac913e9f9b80337d687a2fe786a634 Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 7 Feb 2023 18:24:19 +0800 Subject: PCI: Add ACS quirk for Wangxun NICs Wangxun has verified there is no peer-to-peer between functions for the below selection of SFxxx, RP1000 and RP2000 NICS. They may be multi-function devices, but the hardware does not advertise ACS capability. Add an ACS quirk for these devices so the functions can be in independent IOMMU groups. Link: https://lore.kernel.org/r/20230207102419.44326-1-mengyuanlou@net-swift.com Signed-off-by: Mengyuan Lou Signed-off-by: Bjorn Helgaas --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b362d90eb9b0..bc8f484cdcf3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3012,6 +3012,8 @@ #define PCI_DEVICE_ID_INTEL_VMD_9A0B 0x9a0b #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 +#define PCI_VENDOR_ID_WANGXUN 0x8088 + #define PCI_VENDOR_ID_SCALEMP 0x8686 #define PCI_DEVICE_ID_SCALEMP_VSMP_CTL 0x1010 -- cgit v1.2.3 From f57aec443c24d2e8e1f3b5b4856aea12ddda4254 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Feb 2023 17:01:05 -0800 Subject: cxl/pmem: Fix nvdimm registration races A loop of the form: while true; do modprobe cxl_pci; modprobe -r cxl_pci; done ...fails with the following crash signature: BUG: kernel NULL pointer dereference, address: 0000000000000040 [..] RIP: 0010:cxl_internal_send_cmd+0x5/0xb0 [cxl_core] [..] Call Trace: cxl_pmem_ctl+0x121/0x240 [cxl_pmem] nvdimm_get_config_data+0xd6/0x1a0 [libnvdimm] nd_label_data_init+0x135/0x7e0 [libnvdimm] nvdimm_probe+0xd6/0x1c0 [libnvdimm] nvdimm_bus_probe+0x7a/0x1e0 [libnvdimm] really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __device_attach_driver+0x85/0x110 bus_for_each_drv+0x7d/0xc0 __device_attach+0xb4/0x1e0 bus_probe_device+0x9f/0xc0 device_add+0x445/0x9c0 nd_async_device_register+0xe/0x40 [libnvdimm] async_run_entry_fn+0x30/0x130 ...namely that the bottom half of async nvdimm device registration runs after the CXL has already torn down the context that cxl_pmem_ctl() needs. Unlike the ACPI NFIT case that benefits from launching multiple nvdimm device registrations in parallel from those listed in the table, CXL is already marked PROBE_PREFER_ASYNCHRONOUS. So provide for a synchronous registration path to preclude this scenario. Fixes: 21083f51521f ("cxl/pmem: Register 'pmem' / cxl_nvdimm devices") Cc: Reported-by: Dave Jiang Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index af38252ad704..e772aae71843 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -41,6 +41,9 @@ enum { */ NDD_INCOHERENT = 7, + /* dimm provider wants synchronous registration by __nvdimm_create() */ + NDD_REGISTER_SYNC = 8, + /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, ND_CMD_MAX_ELEM = 5, -- cgit v1.2.3 From 9c395c1b99bd23f74bc628fa000480c49593d17f Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 13 Feb 2023 16:40:10 -0800 Subject: bpf: Add basic bpf_rb_{root,node} support This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new types, and adds bpf_rb_root_free function for freeing bpf_rb_root in map_values. structs bpf_rb_root and bpf_rb_node are opaque types meant to obscure structs rb_root_cached rb_node, respectively. btf_struct_access will prevent BPF programs from touching these special fields automatically now that they're recognized. btf_check_and_fixup_fields now groups list_head and rb_root together as "graph root" fields and {list,rb}_node as "graph node", and does same ownership cycle checking as before. Note that this function does _not_ prevent ownership type mixups (e.g. rb_root owning list_node) - that's handled by btf_parse_graph_root. After this patch, a bpf program can have a struct bpf_rb_root in a map_value, but not add anything to nor do anything useful with it. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8b5d0b4c4ada..be34f7deb6c3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -181,7 +181,10 @@ enum btf_field_type { BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, BPF_LIST_HEAD = (1 << 4), BPF_LIST_NODE = (1 << 5), - BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD, + BPF_RB_ROOT = (1 << 6), + BPF_RB_NODE = (1 << 7), + BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | + BPF_RB_NODE | BPF_RB_ROOT, }; struct btf_field_kptr { @@ -285,6 +288,10 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "bpf_list_head"; case BPF_LIST_NODE: return "bpf_list_node"; + case BPF_RB_ROOT: + return "bpf_rb_root"; + case BPF_RB_NODE: + return "bpf_rb_node"; default: WARN_ON_ONCE(1); return "unknown"; @@ -305,6 +312,10 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_list_head); case BPF_LIST_NODE: return sizeof(struct bpf_list_node); + case BPF_RB_ROOT: + return sizeof(struct bpf_rb_root); + case BPF_RB_NODE: + return sizeof(struct bpf_rb_node); default: WARN_ON_ONCE(1); return 0; @@ -325,6 +336,10 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_list_head); case BPF_LIST_NODE: return __alignof__(struct bpf_list_node); + case BPF_RB_ROOT: + return __alignof__(struct bpf_rb_root); + case BPF_RB_NODE: + return __alignof__(struct bpf_rb_node); default: WARN_ON_ONCE(1); return 0; @@ -435,6 +450,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, void bpf_timer_cancel_and_free(void *timer); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); +void bpf_rb_root_free(const struct btf_field *field, void *rb_root, + struct bpf_spin_lock *spin_lock); + int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); -- cgit v1.2.3 From 1fb2d41501f38192d8a19da585cd441cf8845697 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Feb 2023 18:47:06 +0000 Subject: net: add pskb_may_pull_reason() helper pskb_may_pull() can fail for two different reasons. Provide pskb_may_pull_reason() helper to distinguish between these reasons. It returns: SKB_NOT_DROPPED_YET : Success SKB_DROP_REASON_PKT_TOO_SMALL : packet too small SKB_DROP_REASON_NOMEM : skb->head could not be resized Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 47ab28a37f2f..d5602b15c714 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2631,13 +2631,24 @@ void *skb_pull_data(struct sk_buff *skb, size_t len); void *__pskb_pull_tail(struct sk_buff *skb, int delta); -static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) +static inline enum skb_drop_reason +pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { if (likely(len <= skb_headlen(skb))) - return true; + return SKB_NOT_DROPPED_YET; + if (unlikely(len > skb->len)) - return false; - return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL; + return SKB_DROP_REASON_PKT_TOO_SMALL; + + if (unlikely(!__pskb_pull_tail(skb, len - skb_headlen(skb)))) + return SKB_DROP_REASON_NOMEM; + + return SKB_NOT_DROPPED_YET; +} + +static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) +{ + return pskb_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET; } static inline void *pskb_pull(struct sk_buff *skb, unsigned int len) -- cgit v1.2.3 From a292f2534fb2caec7f3ad32045338f8feac28112 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Thu, 9 Feb 2023 20:36:13 +0800 Subject: crypto: hisilicon/qm - remove some unused defines 1. Remove some macros define since it is not used. 2. Remove enum QM_HW_UNKNOWN since it is not used. 3. Remove unused member 'is_frozen' in 'hisi_qm' structure. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index be3aedaa96dc..df5be7e6db04 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -122,7 +122,6 @@ enum qp_state { }; enum qm_hw_ver { - QM_HW_UNKNOWN = -1, QM_HW_V1 = 0x20, QM_HW_V2 = 0x21, QM_HW_V3 = 0x30, @@ -332,7 +331,6 @@ struct hisi_qm { const char *algs; bool use_sva; - bool is_frozen; resource_size_t phys_base; resource_size_t db_phys_base; -- cgit v1.2.3 From 9b4eb8f8b8eccc313663a99904dcfe2660c41b6c Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Thu, 9 Feb 2023 20:36:16 +0800 Subject: crypto: hisilicon/qm - update comments to match function The return values of some functions have been modified, but the comments have not been modified together. The comments must be updated to be consistent with the functions. Also move comments over the codes instead of right place to ensure consistent coding styles. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index df5be7e6db04..a59fbffa238f 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -308,7 +308,8 @@ struct hisi_qm { const struct hisi_qm_err_ini *err_ini; struct hisi_qm_err_info err_info; struct hisi_qm_err_status err_status; - unsigned long misc_ctl; /* driver removing and reset sched */ + /* driver removing and reset sched */ + unsigned long misc_ctl; /* Device capability bit */ unsigned long caps; -- cgit v1.2.3 From 2b68d659a704abaa61fb3255776b5483bdc4eb35 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Sun, 12 Feb 2023 15:25:20 +0200 Subject: net/mlx5e: TC, support per action stats Extend the action stats callback implementation to update stats for actions that are associated with hw counters. Note that the callback may be called from tc action utility or from tc flower. Both apis expect the driver to return the stats difference from the last update. As such, query the raw counter value and maintain the diff from the last api call in the tc layer, instead of the fs_core layer. Signed-off-by: Oz Shlomo Reviewed-by: Roi Dayan Signed-off-by: Paolo Abeni --- include/linux/mlx5/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index ba6958b49a8e..90a2fe5839fa 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -296,6 +296,8 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); +void mlx5_fc_query_cached_raw(struct mlx5_fc *counter, + u64 *bytes, u64 *packets, u64 *lastuse); int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, u64 *packets, u64 *bytes); u32 mlx5_fc_id(struct mlx5_fc *counter); -- cgit v1.2.3 From 7464145862d606cf9835f24080341a397cbc0669 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 14 Feb 2023 09:40:14 +0100 Subject: regmap: Reorder fields in 'struct regmap_bus' to save some memory Group some bool variables to reduce hole and avoid padding. On x86_64, this shrinks the size from 136 to 128 bytes. As an example: $ size drivers/base/regmap/regmap-fsi.o (Before) text data bss dec hex filename 4837 136 0 4973 136d drivers/base/regmap/regmap-fsi.o $ size drivers/base/regmap/regmap-fsi.o (After) text data bss dec hex filename 4701 136 0 4837 12e5 drivers/base/regmap/regmap-fsi.o Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/077ca39622c8870a3ea932298a9cec34f7a8295a.1676363976.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- include/linux/regmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 029b9e09d3ca..f26432dc02ef 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -520,6 +520,7 @@ typedef void (*regmap_hw_free_context)(void *context); * to perform locking. This field is ignored if custom lock/unlock * functions are used (see fields lock/unlock of * struct regmap_config). + * @free_on_exit: kfree this on exit of regmap * @write: Write operation. * @gather_write: Write operation with split register/value, return -ENOTSUPP * if not implemented on a given device. @@ -548,10 +549,10 @@ typedef void (*regmap_hw_free_context)(void *context); * DEFAULT, BIG is assumed. * @max_raw_read: Max raw read size that can be used on the bus. * @max_raw_write: Max raw write size that can be used on the bus. - * @free_on_exit: kfree this on exit of regmap */ struct regmap_bus { bool fast_io; + bool free_on_exit; regmap_hw_write write; regmap_hw_gather_write gather_write; regmap_hw_async_write async_write; @@ -568,7 +569,6 @@ struct regmap_bus { enum regmap_endian val_format_endian_default; size_t max_raw_read; size_t max_raw_write; - bool free_on_exit; }; /* -- cgit v1.2.3 From 9d77522b45246c3dc5950b9641aea49ce3c973d7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 14 Feb 2023 11:34:50 +0100 Subject: spi: Reorder fields in 'struct spi_transfer' Group some variables based on their sizes to reduce hole and avoid padding. On x86_64, this shrinks the size from 144 to 128 bytes. Turn 'timestamped' into a bitfield so that it can be easily merged with some other bifields and move 'error'. This should have no real impact on memory allocation because 'struct spi_transfer' is mostly used on stack, but it can save a few cycles when the structure is initialized or copied. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/93a051da85a895bc6003aedfb00a13e1c2fc6338.1676370870.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 88cb2a1390ce..a08c20bd833b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1022,6 +1022,9 @@ struct spi_transfer { void *rx_buf; unsigned len; +#define SPI_TRANS_FAIL_NO_START BIT(0) + u16 error; + dma_addr_t tx_dma; dma_addr_t rx_dma; struct sg_table tx_sg; @@ -1032,6 +1035,7 @@ struct spi_transfer { unsigned cs_change:1; unsigned tx_nbits:3; unsigned rx_nbits:3; + unsigned timestamped:1; #define SPI_NBITS_SINGLE 0x01 /* 1bit transfer */ #define SPI_NBITS_DUAL 0x02 /* 2bits transfer */ #define SPI_NBITS_QUAD 0x04 /* 4bits transfer */ @@ -1048,12 +1052,7 @@ struct spi_transfer { struct ptp_system_timestamp *ptp_sts; - bool timestamped; - struct list_head transfer_list; - -#define SPI_TRANS_FAIL_NO_START BIT(0) - u16 error; }; /** -- cgit v1.2.3 From ab9fdd41d970c38ddc0fd59e5f8f37e8d966d454 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 13 Feb 2023 07:52:11 -0800 Subject: rpmsg: glink: smem: Wrap driver context The Glink SMEM driver allocates a struct device and hangs two devres-allocated pipe objects thereon. To facilitate the move of interrupt and mailbox handling to the driver, introduce a wrapper object capturing the device, glink reference and remote processor id. The type of the remoteproc reference is updated, as these are specifically targeting the SMEM implementation. Signed-off-by: Bjorn Andersson Reviewed-by: Chris Lew Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230213155215.1237059-3-quic_bjorande@quicinc.com --- include/linux/rpmsg/qcom_glink.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg/qcom_glink.h b/include/linux/rpmsg/qcom_glink.h index 22fc3a69b683..bfbd48f435fa 100644 --- a/include/linux/rpmsg/qcom_glink.h +++ b/include/linux/rpmsg/qcom_glink.h @@ -5,7 +5,7 @@ #include -struct qcom_glink; +struct qcom_glink_smem; #if IS_ENABLED(CONFIG_RPMSG_QCOM_GLINK) void qcom_glink_ssr_notify(const char *ssr_name); @@ -15,20 +15,20 @@ static inline void qcom_glink_ssr_notify(const char *ssr_name) {} #if IS_ENABLED(CONFIG_RPMSG_QCOM_GLINK_SMEM) -struct qcom_glink *qcom_glink_smem_register(struct device *parent, - struct device_node *node); -void qcom_glink_smem_unregister(struct qcom_glink *glink); +struct qcom_glink_smem *qcom_glink_smem_register(struct device *parent, + struct device_node *node); +void qcom_glink_smem_unregister(struct qcom_glink_smem *glink); #else -static inline struct qcom_glink * +static inline struct qcom_glink_smem * qcom_glink_smem_register(struct device *parent, struct device_node *node) { return NULL; } -static inline void qcom_glink_smem_unregister(struct qcom_glink *glink) {} +static inline void qcom_glink_smem_unregister(struct qcom_glink_smem *glink) {} #endif #endif -- cgit v1.2.3 From 1dd46599f83ac5323a175d32955b1270e95cd11b Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 14 Feb 2023 05:59:28 -0800 Subject: spi: xilinx: add force_irq for QSPI mode Xilinx PG158 page 80 [1] states that master transaction inhibit bit must be set to properly setup the transaction in QSPI mode. Add the force_irq flag to follow this sequence. [1] https://docs.xilinx.com/r/en-US/pg153-axi-quad-spi/Dual/Quad-SPI-Mode-Transactions Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20230214135928.1253205-1-vadfed@meta.com Signed-off-by: Mark Brown --- include/linux/spi/xilinx_spi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/xilinx_spi.h b/include/linux/spi/xilinx_spi.h index c15d69d28e68..3934ce789d87 100644 --- a/include/linux/spi/xilinx_spi.h +++ b/include/linux/spi/xilinx_spi.h @@ -15,6 +15,7 @@ struct xspi_platform_data { u8 bits_per_word; struct spi_board_info *devices; u8 num_devices; + bool force_irq; }; #endif /* __LINUX_SPI_XILINX_SPI_H */ -- cgit v1.2.3 From 35c5db0ec49f073e6a2d5236b5fcfb0a134a215a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:35 -0500 Subject: NFS: Add basic functionality for tracking folios in struct nfs_page Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index ba7e2e4b0926..d2ddc9a594c5 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -25,6 +25,7 @@ enum { PG_BUSY = 0, /* nfs_{un}lock_request */ PG_MAPPED, /* page private set for buffered io */ + PG_FOLIO, /* Tracking a folio (unset for O_DIRECT) */ PG_CLEAN, /* write succeeded */ PG_COMMIT_TO_DS, /* used by pnfs layouts */ PG_INODE_REF, /* extra ref held by inode when in writeback */ @@ -41,7 +42,10 @@ enum { struct nfs_inode; struct nfs_page { struct list_head wb_list; /* Defines state of page: */ - struct page *wb_page; /* page to read in/write out */ + union { + struct page *wb_page; /* page to read in/write out */ + struct folio *wb_folio; + }; struct nfs_lock_context *wb_lock_context; /* lock context info */ pgoff_t wb_index; /* Offset >> PAGE_SHIFT */ unsigned int wb_offset, /* Offset & ~PAGE_MASK */ @@ -153,6 +157,38 @@ extern int nfs_page_set_headlock(struct nfs_page *req); extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); +/** + * nfs_page_to_folio - Retrieve a struct folio for the request + * @req: pointer to a struct nfs_page + * + * If a folio was assigned to @req, then return it, otherwise return NULL. + */ +static inline struct folio *nfs_page_to_folio(const struct nfs_page *req) +{ + if (test_bit(PG_FOLIO, &req->wb_flags)) + return req->wb_folio; + return NULL; +} + +/** + * nfs_page_to_page - Retrieve a struct page for the request + * @req: pointer to a struct nfs_page + * @pgbase: folio byte offset + * + * Return the page containing the byte that is at offset @pgbase relative + * to the start of the folio. + * Note: The request starts at offset @req->wb_pgbase. + */ +static inline struct page *nfs_page_to_page(const struct nfs_page *req, + size_t pgbase) +{ + struct folio *folio = nfs_page_to_folio(req); + + if (folio == NULL) + return req->wb_page; + return folio_page(folio, pgbase >> PAGE_SHIFT); +} + /* * Lock the page of an asynchronous request */ -- cgit v1.2.3 From 8e0bdc7021f713fdf3b985cda3ce715e41b06698 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:37 -0500 Subject: NFS: Fix nfs_coalesce_size() to work with folios Use the helper folio_size() where appropriate. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index d2ddc9a594c5..192071a6e5f6 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -189,6 +189,21 @@ static inline struct page *nfs_page_to_page(const struct nfs_page *req, return folio_page(folio, pgbase >> PAGE_SHIFT); } +/** + * nfs_page_max_length - Retrieve the maximum possible length for a request + * @req: pointer to a struct nfs_page + * + * Returns the maximum possible length of a request + */ +static inline size_t nfs_page_max_length(const struct nfs_page *req) +{ + struct folio *folio = nfs_page_to_folio(req); + + if (folio == NULL) + return PAGE_SIZE; + return folio_size(folio); +} + /* * Lock the page of an asynchronous request */ -- cgit v1.2.3 From 6dd85e83f3f182b56770f8bb6dbed1f0dafb9117 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:38 -0500 Subject: NFS: Add a helper to convert a struct nfs_page into an inode Replace all the open coded calls to page_file_mapping(req->wb_page)->host. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 192071a6e5f6..b0b03ec4a209 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -189,6 +189,19 @@ static inline struct page *nfs_page_to_page(const struct nfs_page *req, return folio_page(folio, pgbase >> PAGE_SHIFT); } +/** + * nfs_page_to_inode - Retrieve an inode for the request + * @req: pointer to a struct nfs_page + */ +static inline struct inode *nfs_page_to_inode(const struct nfs_page *req) +{ + struct folio *folio = nfs_page_to_folio(req); + + if (folio == NULL) + return page_file_mapping(req->wb_page)->host; + return folio_file_mapping(folio)->host; +} + /** * nfs_page_max_length - Retrieve the maximum possible length for a request * @req: pointer to a struct nfs_page -- cgit v1.2.3 From 4b27232a6e064f3d779cfa76cd251d6023949d22 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:40 -0500 Subject: NFS: Add a helper nfs_wb_folio() ...and use it in nfs_launder_folio(). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d92fdfd2444c..66b5de42f6b8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -578,6 +578,7 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned */ extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); +extern int nfs_wb_folio(struct inode *inode, struct folio *folio); extern int nfs_wb_page(struct inode *inode, struct page *page); int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); -- cgit v1.2.3 From ab75bff1140733f1b43e81f055acd7d27af7ac05 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:41 -0500 Subject: NFS: Convert buffered reads to use folios Perform a largely mechanical conversion of references to struct page and page-specific functions to use the folio equivalents. Note that the fscache functionality remains untouched. Instead we just pass in the folio page. This should be OK, as long as we use order 0 folios together with fscache. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index b0b03ec4a209..3c71493d5cc3 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -125,6 +125,10 @@ extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, struct page *page, unsigned int offset, unsigned int count); +extern struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx, + struct folio *folio, + unsigned int offset, + unsigned int count); extern void nfs_release_request(struct nfs_page *); -- cgit v1.2.3 From 0c493b5cf16e28d761b6e77c7c32aa0e7af70813 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:43 -0500 Subject: NFS: Convert buffered writes to use folios Mostly mechanical conversion of struct page and functions into struct folio equivalents. The lack of support for folios in write_cache_pages(), means we still only support order 0 folio allocations. However the rest of the writeback code should now be ready for order n > 0. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 66b5de42f6b8..a61bfd52d551 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -569,8 +569,9 @@ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); extern int nfs_congestion_kb; extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); -extern int nfs_flush_incompatible(struct file *file, struct page *page); -extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); +extern int nfs_flush_incompatible(struct file *file, struct folio *folio); +extern int nfs_update_folio(struct file *file, struct folio *folio, + unsigned int offset, unsigned int count); /* * Try to write back everything synchronously (but check the -- cgit v1.2.3 From 4cbf76948c457f0beb9f184ebb21341c8235846a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:44 -0500 Subject: NFS: Remove unused function nfs_wb_page() Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a61bfd52d551..45c44211e50e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -580,7 +580,6 @@ extern int nfs_update_folio(struct file *file, struct folio *folio, extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_folio(struct inode *inode, struct folio *folio); -extern int nfs_wb_page(struct inode *inode, struct page *page); int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); -- cgit v1.2.3 From 70e9db69f927bb378db9aaa807cc83ae550779a9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:47 -0500 Subject: NFS: Clean up O_DIRECT request allocation Rather than adjusting the index+offset after the call to nfs_create_request(), add a function nfs_page_create_from_page() that takes an offset. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3c71493d5cc3..a2f1ca657623 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -121,10 +121,11 @@ struct nfs_pageio_descriptor { #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) -extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, - struct page *page, - unsigned int offset, - unsigned int count); +extern struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx, + struct page *page, + unsigned int pgbase, + loff_t offset, + unsigned int count); extern struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx, struct folio *folio, unsigned int offset, -- cgit v1.2.3 From 3bd940030752a33ff665eefdd74a1cdb74a4f9b0 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 25 Jan 2023 21:00:44 +0100 Subject: dm: add missing SPDX-License-Indentifiers 'GPL-2.0-only' is used instead of 'GPL-2.0' because SPDX has deprecated its use. Suggested-by: John Wiele Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 1 + include/linux/dm-bufio.h | 1 + include/linux/dm-dirty-log.h | 1 + include/linux/dm-io.h | 1 + include/linux/dm-kcopyd.h | 1 + include/linux/dm-region-hash.h | 1 + 6 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 04c6acf7faaa..425fa9be648c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2001 Sistina Software (UK) Limited. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 15d9e15ca830..798379b8597a 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2009-2011 Red Hat, Inc. * diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h index 7084503c3405..c12d40e332c1 100644 --- a/include/linux/dm-dirty-log.h +++ b/include/linux/dm-dirty-log.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2003 Sistina Software * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 8e1c4ab5df04..6846019da2a5 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2003 Sistina Software * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index c1707ee5b540..6b4ece85a2a0 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2001 - 2003 Sistina Software * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. diff --git a/include/linux/dm-region-hash.h b/include/linux/dm-region-hash.h index 9e2a7a401df5..095541c59123 100644 --- a/include/linux/dm-region-hash.h +++ b/include/linux/dm-region-hash.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2003 Sistina Software Limited. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. -- cgit v1.2.3 From 86a3238c7b9b759cb864f4f768ab2e24687dc0e6 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 25 Jan 2023 21:14:58 +0100 Subject: dm: change "unsigned" to "unsigned int" Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 38 +++++++++++++++++++------------------- include/linux/dm-bufio.h | 12 ++++++------ include/linux/dm-dirty-log.h | 6 +++--- include/linux/dm-io.h | 8 ++++---- include/linux/dm-kcopyd.h | 22 +++++++++++----------- include/linux/dm-region-hash.h | 2 +- 6 files changed, 44 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 425fa9be648c..9881e772b68c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -88,10 +88,10 @@ typedef int (*dm_preresume_fn) (struct dm_target *ti); typedef void (*dm_resume_fn) (struct dm_target *ti); typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, - unsigned status_flags, char *result, unsigned maxlen); + unsigned int status_flags, char *result, unsigned int maxlen); -typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv, - char *result, unsigned maxlen); +typedef int (*dm_message_fn) (struct dm_target *ti, unsigned int argc, char **argv, + char *result, unsigned int maxlen); typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev); @@ -188,7 +188,7 @@ struct target_type { uint64_t features; const char *name; struct module *module; - unsigned version[3]; + unsigned int version[3]; dm_ctr_fn ctr; dm_dtr_fn dtr; dm_map_fn map; @@ -314,31 +314,31 @@ struct dm_target { * It is a responsibility of the target driver to remap these bios * to the real underlying devices. */ - unsigned num_flush_bios; + unsigned int num_flush_bios; /* * The number of discard bios that will be submitted to the target. * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_discard_bios; + unsigned int num_discard_bios; /* * The number of secure erase bios that will be submitted to the target. * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_secure_erase_bios; + unsigned int num_secure_erase_bios; /* * The number of WRITE ZEROES bios that will be submitted to the target. * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_write_zeroes_bios; + unsigned int num_write_zeroes_bios; /* * The minimum number of extra bytes allocated in each io for the * target to use. */ - unsigned per_io_data_size; + unsigned int per_io_data_size; /* target specific data */ void *private; @@ -384,7 +384,7 @@ struct dm_target { void *dm_per_bio_data(struct bio *bio, size_t data_size); struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size); -unsigned dm_bio_get_target_bio_nr(const struct bio *bio); +unsigned int dm_bio_get_target_bio_nr(const struct bio *bio); u64 dm_start_time_ns_from_clone(struct bio *bio); @@ -395,7 +395,7 @@ void dm_unregister_target(struct target_type *t); * Target argument parsing. */ struct dm_arg_set { - unsigned argc; + unsigned int argc; char **argv; }; @@ -404,8 +404,8 @@ struct dm_arg_set { * the error message to use if the number is found to be outside that range. */ struct dm_arg { - unsigned min; - unsigned max; + unsigned int min; + unsigned int max; char *error; }; @@ -414,7 +414,7 @@ struct dm_arg { * returning -EINVAL and setting *error. */ int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set, - unsigned *value, char **error); + unsigned int *value, char **error); /* * Process the next argument as the start of a group containing between @@ -422,7 +422,7 @@ int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set, * *num_args or, if invalid, return -EINVAL and set *error. */ int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set, - unsigned *num_args, char **error); + unsigned int *num_args, char **error); /* * Return the current argument and shift to the next. @@ -432,7 +432,7 @@ const char *dm_shift_arg(struct dm_arg_set *as); /* * Move through num_args arguments. */ -void dm_consume_args(struct dm_arg_set *as, unsigned num_args); +void dm_consume_args(struct dm_arg_set *as, unsigned int num_args); /*----------------------------------------------------------------- * Functions for creating and manipulating mapped devices. @@ -462,7 +462,7 @@ void *dm_get_mdptr(struct mapped_device *md); /* * A device can still be used while suspended, but I/O is deferred. */ -int dm_suspend(struct mapped_device *md, unsigned suspend_flags); +int dm_suspend(struct mapped_device *md, unsigned int suspend_flags); int dm_resume(struct mapped_device *md); /* @@ -482,7 +482,7 @@ struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct dm_target *ti); int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); -void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); +void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors); void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone); union map_info *dm_get_rq_mapinfo(struct request *rq); @@ -526,7 +526,7 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); * First create an empty table. */ int dm_table_create(struct dm_table **result, fmode_t mode, - unsigned num_targets, struct mapped_device *md); + unsigned int num_targets, struct mapped_device *md); /* * Then call this once for each target. diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 798379b8597a..2056743aaaaa 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -27,8 +27,8 @@ struct dm_buffer; * Create a buffered IO cache on a given device */ struct dm_bufio_client * -dm_bufio_client_create(struct block_device *bdev, unsigned block_size, - unsigned reserved_buffers, unsigned aux_size, +dm_bufio_client_create(struct block_device *bdev, unsigned int block_size, + unsigned int reserved_buffers, unsigned int aux_size, void (*alloc_callback)(struct dm_buffer *), void (*write_callback)(struct dm_buffer *), unsigned int flags); @@ -82,7 +82,7 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, * I/O to finish. */ void dm_bufio_prefetch(struct dm_bufio_client *c, - sector_t block, unsigned n_blocks); + sector_t block, unsigned int n_blocks); /* * Release a reference obtained with dm_bufio_{read,get,new}. The data @@ -107,7 +107,7 @@ void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); * write the specified part of the buffer or it may write a larger superset. */ void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, - unsigned start, unsigned end); + unsigned int start, unsigned int end); /* * Initiate writing of dirty buffers, without waiting for completion. @@ -153,9 +153,9 @@ void dm_bufio_forget_buffers(struct dm_bufio_client *c, sector_t block, sector_t /* * Set the minimum number of buffers before cleanup happens. */ -void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n); +void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned int n); -unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); +unsigned int dm_bufio_get_block_size(struct dm_bufio_client *c); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c); sector_t dm_bufio_get_block_number(struct dm_buffer *b); diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h index c12d40e332c1..a3eb7490d205 100644 --- a/include/linux/dm-dirty-log.h +++ b/include/linux/dm-dirty-log.h @@ -34,7 +34,7 @@ struct dm_dirty_log_type { struct list_head list; int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, - unsigned argc, char **argv); + unsigned int argc, char **argv); void (*dtr)(struct dm_dirty_log *log); /* @@ -117,7 +117,7 @@ struct dm_dirty_log_type { * Support function for mirror status requests. */ int (*status)(struct dm_dirty_log *log, status_type_t status_type, - char *result, unsigned maxlen); + char *result, unsigned int maxlen); /* * is_remote_recovering is necessary for cluster mirroring. It provides @@ -140,7 +140,7 @@ int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type); struct dm_dirty_log *dm_dirty_log_create(const char *type_name, struct dm_target *ti, int (*flush_callback_fn)(struct dm_target *ti), - unsigned argc, char **argv); + unsigned int argc, char **argv); void dm_dirty_log_destroy(struct dm_dirty_log *log); #endif /* __KERNEL__ */ diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 6846019da2a5..7595142f3fc5 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -27,7 +27,7 @@ struct page_list { struct page *page; }; -typedef void (*io_notify_fn)(unsigned long error, void *context); +typedef void (*io_notify_fn)(unsigned int long error, void *context); enum dm_io_mem_type { DM_IO_PAGE_LIST,/* Page list */ @@ -39,7 +39,7 @@ enum dm_io_mem_type { struct dm_io_memory { enum dm_io_mem_type type; - unsigned offset; + unsigned int offset; union { struct page_list *pl; @@ -79,8 +79,8 @@ void dm_io_client_destroy(struct dm_io_client *client); * Each bit in the optional 'sync_error_bits' bitset indicates whether an * error occurred doing io to the corresponding region. */ -int dm_io(struct dm_io_request *io_req, unsigned num_regions, - struct dm_io_region *region, unsigned long *sync_error_bits); +int dm_io(struct dm_io_request *io_req, unsigned int num_regions, + struct dm_io_region *region, unsigned int long *sync_error_bits); #endif /* __KERNEL__ */ #endif /* _LINUX_DM_IO_H */ diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 6b4ece85a2a0..51fb1af0b63e 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -24,11 +24,11 @@ #define DM_KCOPYD_WRITE_SEQ 2 struct dm_kcopyd_throttle { - unsigned throttle; - unsigned num_io_jobs; - unsigned io_period; - unsigned total_period; - unsigned last_jiffies; + unsigned int throttle; + unsigned int num_io_jobs; + unsigned int io_period; + unsigned int total_period; + unsigned int last_jiffies; }; /* @@ -61,12 +61,12 @@ void dm_kcopyd_client_flush(struct dm_kcopyd_client *kc); * read_err is a boolean, * write_err is a bitset, with 1 bit for each destination region */ -typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned long write_err, +typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned int long write_err, void *context); void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, - unsigned num_dests, struct dm_io_region *dests, - unsigned flags, dm_kcopyd_notify_fn fn, void *context); + unsigned int num_dests, struct dm_io_region *dests, + unsigned int flags, dm_kcopyd_notify_fn fn, void *context); /* * Prepare a callback and submit it via the kcopyd thread. @@ -81,11 +81,11 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, */ void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, dm_kcopyd_notify_fn fn, void *context); -void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err); +void dm_kcopyd_do_callback(void *job, int read_err, unsigned int long write_err); void dm_kcopyd_zero(struct dm_kcopyd_client *kc, - unsigned num_dests, struct dm_io_region *dests, - unsigned flags, dm_kcopyd_notify_fn fn, void *context); + unsigned int num_dests, struct dm_io_region *dests, + unsigned int flags, dm_kcopyd_notify_fn fn, void *context); #endif /* __KERNEL__ */ #endif /* _LINUX_DM_KCOPYD_H */ diff --git a/include/linux/dm-region-hash.h b/include/linux/dm-region-hash.h index 095541c59123..4086098a0d8e 100644 --- a/include/linux/dm-region-hash.h +++ b/include/linux/dm-region-hash.h @@ -38,7 +38,7 @@ struct dm_region_hash *dm_region_hash_create( struct bio_list *bios), void (*wakeup_workers)(void *context), void (*wakeup_all_recovery_waiters)(void *context), - sector_t target_begin, unsigned max_recovery, + sector_t target_begin, unsigned int max_recovery, struct dm_dirty_log *log, uint32_t region_size, region_t nr_regions); void dm_region_hash_destroy(struct dm_region_hash *rh); -- cgit v1.2.3 From 44bc08ed63db7a852bd1ba16611b700ee666091c Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 1 Feb 2023 21:51:04 +0100 Subject: dm: enclose complex macros into parentheses where possible Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 9881e772b68c..aa68267bc36f 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -613,8 +613,7 @@ void dm_destroy_crypto_profile(struct blk_crypto_profile *profile); #define DMDEBUG(fmt, ...) pr_debug(DM_FMT(fmt), ##__VA_ARGS__) #define DMDEBUG_LIMIT(fmt, ...) pr_debug_ratelimited(DM_FMT(fmt), ##__VA_ARGS__) -#define DMEMIT(x...) sz += ((sz >= maxlen) ? \ - 0 : scnprintf(result + sz, maxlen - sz, x)) +#define DMEMIT(x...) (sz += ((sz >= maxlen) ? 0 : scnprintf(result + sz, maxlen - sz, x))) #define DMEMIT_TARGET_NAME_VERSION(y) \ DMEMIT("target_name=%s,target_version=%u.%u.%u", \ -- cgit v1.2.3 From a4a82ce3d24d4409143a7b7b980072ada6e20b2a Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 26 Jan 2023 15:48:30 +0100 Subject: dm: correct block comments format. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 18 ++++++++++++------ include/linux/dm-dirty-log.h | 2 +- include/linux/dm-region-hash.h | 6 ++++-- 3 files changed, 17 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index aa68267bc36f..7975483816e4 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -434,10 +434,12 @@ const char *dm_shift_arg(struct dm_arg_set *as); */ void dm_consume_args(struct dm_arg_set *as, unsigned int num_args); -/*----------------------------------------------------------------- +/* + *---------------------------------------------------------------- * Functions for creating and manipulating mapped devices. * Drop the reference with dm_put when you finish with the object. - *---------------------------------------------------------------*/ + *---------------------------------------------------------------- + */ /* * DM_ANY_MINOR chooses the next available minor number. @@ -518,9 +520,11 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md); int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo); int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); -/*----------------------------------------------------------------- +/* + *--------------------------------------------------------------- * Functions for manipulating device-mapper tables. - *---------------------------------------------------------------*/ + *--------------------------------------------------------------- + */ /* * First create an empty table. @@ -594,9 +598,11 @@ struct dm_table *dm_swap_table(struct mapped_device *md, */ void dm_destroy_crypto_profile(struct blk_crypto_profile *profile); -/*----------------------------------------------------------------- +/* + *--------------------------------------------------------------- * Macros. - *---------------------------------------------------------------*/ + *--------------------------------------------------------------- + */ #define DM_NAME "device-mapper" #define DM_FMT(fmt) DM_NAME ": " DM_MSG_PREFIX ": " fmt "\n" diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h index a3eb7490d205..0b10faedb26a 100644 --- a/include/linux/dm-dirty-log.h +++ b/include/linux/dm-dirty-log.h @@ -97,7 +97,7 @@ struct dm_dirty_log_type { * Do not confuse this function with 'in_sync()', one * tells you if an area is synchronised, the other * assigns recovery work. - */ + */ int (*get_resync_work)(struct dm_dirty_log *log, region_t *region); /* diff --git a/include/linux/dm-region-hash.h b/include/linux/dm-region-hash.h index 4086098a0d8e..3079ed93dd2d 100644 --- a/include/linux/dm-region-hash.h +++ b/include/linux/dm-region-hash.h @@ -13,9 +13,11 @@ #include -/*----------------------------------------------------------------- +/* + *---------------------------------------------------------------- * Region hash - *----------------------------------------------------------------*/ + *---------------------------------------------------------------- + */ struct dm_region_hash; struct dm_region; -- cgit v1.2.3 From 1231039db31cf0703996d0b1797c2702e25a110a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Feb 2023 19:33:04 +0100 Subject: Revert "blk-cgroup: move the cgroup information to struct gendisk" This reverts commit 3f13ab7c80fdb0ada86a8e3e818960bc1ccbaa59 as a patch it depends on caused a few problems. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230214183308.1658775-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 79aec4ebadb9..b9637d63e6f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -163,12 +163,6 @@ struct gendisk { struct timer_rand_state *random; atomic_t sync_io; /* RAID */ struct disk_events *ev; -#ifdef CONFIG_BLK_CGROUP - DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); - struct blkcg_gq *root_blkg; - struct list_head blkg_list; - struct mutex blkcg_mutex; -#endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_BLK_DEV_INTEGRITY struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -487,6 +481,12 @@ struct request_queue { struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; +#ifdef CONFIG_BLK_CGROUP + DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); + struct blkcg_gq *root_blkg; + struct list_head blkg_list; + struct mutex blkcg_mutex; +#endif struct queue_limits limits; -- cgit v1.2.3 From 73af3711c7028286136bb7e9422b19f5a016626d Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 30 Nov 2022 15:12:50 +0200 Subject: net/mlx5: Lag, set different uplink vport metadata in multiport eswitch mode In a follow-up commit multiport eswitch mode will use a shared fdb. In shared fdb there is a single eswitch fdb and traffic could come from any port. to distinguish between the ports set a different metadata per uplink port. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ecd3b5448fe9..a4bb5842a948 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -217,6 +217,7 @@ struct mlx5_rsc_debug { enum mlx5_dev_event { MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */ MLX5_DEV_EVENT_PORT_AFFINITY = 129, + MLX5_DEV_EVENT_MULTIPORT_ESW = 130, }; enum mlx5_port_status { -- cgit v1.2.3 From 27f9e0ccb6da0857a323c1d19a23b6666ddefe05 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 5 Dec 2022 15:32:52 +0200 Subject: net/mlx5: Lag, Add single RDMA device in multiport mode In MultiPort E-Switch mode a single RDMA is created. This device has multiple RDMA ports that represent the uplink ports that are connected to the E-Switch. Account for this when creating the RDMA device so it has an additional port for the non native uplink. As a side effect of this patch, use shared fdb in multiport eswitch mode. Signed-off-by: Mark Bloch Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a4bb5842a948..c9259350cdfc 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1162,6 +1162,7 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev); bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev); bool mlx5_lag_is_master(struct mlx5_core_dev *dev); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); +bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave); -- cgit v1.2.3 From c30f3faa2a81bc6d7d05967eb07bbe3bd5b359e6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 17 Jan 2023 14:44:54 +0100 Subject: net/mlx5e: Move dl_port to struct mlx5e_dev No need to have dl_port which is tightly coupled with mlx5e code in mlx5 core code. Move it to struct mlx5e_dev and loose mlx5e_devlink_get_dl_port() helper. Signed-off-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c9259350cdfc..a170c8565779 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -679,7 +679,6 @@ struct mlx5e_resources { u32 mkey; struct mlx5_sq_bfreg bfreg; } hw_objs; - struct devlink_port dl_port; struct net_device *uplink_netdev; struct mutex uplink_netdev_lock; struct mlx5_crypto_dek_priv *dek_priv; -- cgit v1.2.3 From 7368f221e09eac116050475bec5a292a85c5ea8a Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 4 Jan 2023 20:17:52 -0800 Subject: net/mlx5: Introduce new destination type TABLE_TYPE This new destination type supports flow transition between different table types, e.g. from NIC_RX to RDMA_RX or from RDMA_TX to NIC_TX. Signed-off-by: Patrisious Haddad Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2d17b6a6d82d..c3d3a2eef7d4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -315,6 +315,11 @@ enum { MLX5_CMD_OP_GENERAL_END = 0xd00, }; +enum { + MLX5_FT_NIC_RX_2_NIC_RX_RDMA = BIT(0), + MLX5_FT_NIC_TX_RDMA_2_NIC_TX = BIT(1), +}; + struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_dmac[0x1]; u8 outer_smac[0x1]; @@ -1903,7 +1908,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_e0[0xc0]; - u8 reserved_at_1a0[0xb]; + u8 flow_table_type_2_type[0x8]; + u8 reserved_at_1a8[0x3]; u8 log_min_mkey_entity_size[0x5]; u8 reserved_at_1b0[0x10]; @@ -1927,6 +1933,7 @@ enum mlx5_ifc_flow_destination_type { MLX5_IFC_FLOW_DESTINATION_TYPE_TIR = 0x2, MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6, MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK = 0x8, + MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE = 0xA, }; enum mlx5_flow_table_miss_action { @@ -1941,7 +1948,8 @@ struct mlx5_ifc_dest_format_struct_bits { u8 destination_eswitch_owner_vhca_id_valid[0x1]; u8 packet_reformat[0x1]; - u8 reserved_at_22[0xe]; + u8 reserved_at_22[0x6]; + u8 destination_table_type[0x8]; u8 destination_eswitch_owner_vhca_id[0x10]; }; -- cgit v1.2.3 From 4f226b71f5edc33843e21f92ebe97b1561804d52 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 4 Jan 2023 20:17:53 -0800 Subject: net/mlx5: Implement new destination type TABLE_TYPE Implement new destination type to support flow transition between different table types. e.g. from NIC_RX to RDMA_RX or from RDMA_TX to NIC_TX. The new destination is described in the tracepoint as follows: "mlx5_fs_add_rule: rule=00000000d53cd0ed fte=0000000048a8a6ed index=0 sw_action=<> [dst] flow_table_type=7 id:262152" Signed-off-by: Mark Zhang Signed-off-by: Patrisious Haddad Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index ba6958b49a8e..1d2a0638ae74 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -51,6 +51,7 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_COUNTER, MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM, MLX5_FLOW_DESTINATION_TYPE_RANGE, + MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE, }; enum { -- cgit v1.2.3 From f91ddd3aa4b313aa4b792d1588ccb63c3e4ace0b Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 4 Jan 2023 20:17:54 -0800 Subject: net/mlx5: Add IPSec priorities in RDMA namespaces Add IPSec flow steering priorities in RDMA namespaces. This allows adding tables/rules to forward RoCEv2 traffic to the IPSec crypto tables in NIC_TX domain, and accept RoCEv2 traffic from NIC_RX domain. Signed-off-by: Mark Zhang Signed-off-by: Patrisious Haddad Reviewed-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 1d2a0638ae74..d72a09a3798c 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -103,6 +103,8 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_PORT_SEL, MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS, MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS, + MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC, + MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC, }; enum { -- cgit v1.2.3 From ade1229caed9433921b69e20bd6fadf1bba9558e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Feb 2023 15:01:21 +0100 Subject: dma-mapping: no need to pass a bus_type into get_arch_dma_ops() The get_arch_dma_ops() arch-specific function never does anything with the struct bus_type that is passed into it, so remove it entirely as it is not needed. Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Bogendoerfer Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: Marek Szyprowski Cc: Robin Murphy Cc: Konrad Rzeszutek Wilk Cc: linux-alpha@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: iommu@lists.linux.dev Cc: linux-arch@vger.kernel.org Acked-by: Christoph Hellwig Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230214140121.131859-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/dma-map-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index d678afeb8a13..41bf4bdb117a 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -90,7 +90,7 @@ static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { if (dev->dma_ops) return dev->dma_ops; - return get_arch_dma_ops(dev->bus); + return get_arch_dma_ops(); } static inline void set_dma_ops(struct device *dev, -- cgit v1.2.3 From d16c0cd27331179daa86a3a489f50ce409121c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Mon, 10 Oct 2022 08:43:59 +0200 Subject: docs: driver-api: virtio: virtio on Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Basic doc about Virtio on Linux and a short tutorial on Virtio drivers. includes the following fixup: virtio: fix virtio_config_ops kerneldocs Fixes two warning messages when building htmldocs: warning: duplicate section name 'Note' warning: expecting prototype for virtio_config_ops(). Prototype was for vq_callback_t() instead Message-Id: <20221010064359.1324353-2-ricardo.canuelo@collabora.com> Signed-off-by: Ricardo Cañuelo Reviewed-by: Cornelia Huck Message-Id: <20221220100035.2712449-1-ricardo.canuelo@collabora.com> Reported-by: Stephen Rothwell Reviewed-by: Bagas Sanjaya Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 4b517649cfe8..2b3438de2c4d 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -16,8 +16,10 @@ struct virtio_shm_region { u64 len; }; +typedef void vq_callback_t(struct virtqueue *); + /** - * virtio_config_ops - operations for configuring a virtio device + * struct virtio_config_ops - operations for configuring a virtio device * Note: Do not assume that a transport implements all of the operations * getting/setting a value as a simple read/write! Generally speaking, * any of @get/@set, @get_status/@set_status, or @get_features/ @@ -69,7 +71,8 @@ struct virtio_shm_region { * vdev: the virtio_device * This sends the driver feature bits to the device: it can change * the dev->feature bits if it wants. - * Note: despite the name this can be called any number of times. + * Note that despite the name this can be called any number of + * times. * Returns 0 on success or error status * @bus_name: return the bus name associated with the device (optional) * vdev: the virtio_device @@ -91,7 +94,6 @@ struct virtio_shm_region { * If disable_vq_and_reset is set, then enable_vq_after_reset must also be * set. */ -typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len); -- cgit v1.2.3 From 88f94c782b0ee2297685764f942e7c176d6b89aa Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 14 Feb 2023 22:41:19 +0100 Subject: mmc: core: support setting card detect interrupt from drivers On certain platforms like Amlogic Meson gpiod_to_irq() isn't supported due to the design of gpio / interrupt controller. Therefore provide an option for drivers to pass the card detect interrupt number (retrieved e.g. from device tree) to mmc core. Suggested-by refers to the mechanism to pass and store the interrupt. Suggested-by: Ulf Hansson Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/5777f38b-465f-ce48-a87f-5eb8b3c57b0a@gmail.com Signed-off-by: Ulf Hansson --- include/linux/mmc/slot-gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 4ae2f2908f99..5d3d15e97868 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -15,6 +15,7 @@ struct mmc_host; int mmc_gpio_get_ro(struct mmc_host *host); int mmc_gpio_get_cd(struct mmc_host *host); +void mmc_gpio_set_cd_irq(struct mmc_host *host, int irq); int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, unsigned int idx, bool override_active_level, unsigned int debounce); -- cgit v1.2.3 From fd8f8ede239bc0531aca65f408ecf44d6755c558 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:49:58 +0100 Subject: block: export bio_split_rw bio_split_rw can be used by file systems to split and incoming write bio into multiple bios fitting the hardware limit for use as ZONE_APPEND bios. Export it for initial use in btrfs. Reviewed-by: Josef Bacik Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/linux/bio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index c1da63f6c808..d766be7152e1 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -12,6 +12,8 @@ #define BIO_MAX_VECS 256U +struct queue_limits; + static inline unsigned int bio_max_segs(unsigned int nr_segs) { return min(nr_segs, BIO_MAX_VECS); @@ -375,6 +377,8 @@ static inline void bip_set_seed(struct bio_integrity_payload *bip, void bio_trim(struct bio *bio, sector_t offset, sector_t size); extern struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs); +struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, + unsigned *segs, struct bio_set *bs, unsigned max_bytes); /** * bio_next_split - get next @sectors from a bio, splitting if necessary -- cgit v1.2.3 From 8e81aa16a42169faae1ba15cd648cc8bb83eaa48 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:31 +0100 Subject: iomap: remove IOMAP_F_ZONE_APPEND No users left now that btrfs takes REQ_OP_WRITE bios from iomap and splits and converts them to REQ_OP_ZONE_APPEND internally. Reviewed-by: Josef Bacik Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- include/linux/iomap.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 0983dfc9a203..fca43a4bd96b 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -58,8 +58,7 @@ struct vm_fault; #define IOMAP_F_SHARED (1U << 2) #define IOMAP_F_MERGED (1U << 3) #define IOMAP_F_BUFFER_HEAD (1U << 4) -#define IOMAP_F_ZONE_APPEND (1U << 5) -#define IOMAP_F_XATTR (1U << 6) +#define IOMAP_F_XATTR (1U << 5) /* * Flags set by the core iomap code during operations: -- cgit v1.2.3 From 997849c4b969034e225153f41026657def66d286 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 15 Feb 2023 16:21:31 +0800 Subject: bpf: Zeroing allocated object from slab in bpf memory allocator Currently the freed element in bpf memory allocator may be immediately reused, for htab map the reuse will reinitialize special fields in map value (e.g., bpf_spin_lock), but lookup procedure may still access these special fields, and it may lead to hard-lockup as shown below: NMI backtrace for cpu 16 CPU: 16 PID: 2574 Comm: htab.bin Tainted: G L 6.1.0+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), RIP: 0010:queued_spin_lock_slowpath+0x283/0x2c0 ...... Call Trace: copy_map_value_locked+0xb7/0x170 bpf_map_copy_value+0x113/0x3c0 __sys_bpf+0x1c67/0x2780 __x64_sys_bpf+0x1c/0x20 do_syscall_64+0x30/0x60 entry_SYSCALL_64_after_hwframe+0x46/0xb0 ...... For htab map, just like the preallocated case, these is no need to initialize these special fields in map value again once these fields have been initialized. For preallocated htab map, these fields are initialized through __GFP_ZERO in bpf_map_area_alloc(), so do the similar thing for non-preallocated htab in bpf memory allocator. And there is no need to use __GFP_ZERO for per-cpu bpf memory allocator, because __alloc_percpu_gfp() does it implicitly. Fixes: 0fd7c5d43339 ("bpf: Optimize call_rcu in non-preallocated hash map.") Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20230215082132.3856544-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index be34f7deb6c3..520b238abd5a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -363,6 +363,13 @@ static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj) memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]); } +/* 'dst' must be a temporary buffer and should not point to memory that is being + * used in parallel by a bpf program or bpf syscall, otherwise the access from + * the bpf program or bpf syscall may be corrupted by the reinitialization, + * leading to weird problems. Even 'dst' is newly-allocated from bpf memory + * allocator, it is still possible for 'dst' to be used in parallel by a bpf + * program or bpf syscall. + */ static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { bpf_obj_init(map->field_offs, dst); -- cgit v1.2.3 From 1a30a6b25f263686dbf2028d56041ac012b10dcb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 15 Feb 2023 14:41:14 -0800 Subject: wifi: brcmfmac: p2p: Introduce generic flexible array frame member Silence run-time memcpy() false positive warning when processing management frames: memcpy: detected field-spanning write (size 27) of single field "&mgmt_frame->u" at drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c:1469 (size 26) Due to this (soon to be fixed) GCC bug[1], FORTIFY_SOURCE (via __builtin_dynamic_object_size) doesn't recognize that the union may end with a flexible array, and returns "26" (the fixed size of the union), rather than the remaining size of the allocation. Add an explicit flexible array member and set it as the destination here, so that we get the correct coverage for the memcpy(). [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101832 Reported-by: Ard Biesheuvel Cc: Arend van Spriel Cc: Franky Lin Cc: Hante Meuleman Cc: Kalle Valo Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Johannes Berg Cc: "Jason A. Donenfeld" Cc: Greg Kroah-Hartman Cc: "Darrick J. Wong" Cc: Colin Ian King Cc: Brian Henriquez Cc: linux-wireless@vger.kernel.org Cc: brcm80211-dev-list.pdl@broadcom.com Cc: SHA-cyfmac-dev-list@infineon.com Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20230215224110.never.022-kees@kernel.org [rename 'frame' to 'body'] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 80d6308dea06..2463bdd2a382 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1356,6 +1356,7 @@ struct ieee80211_mgmt { } __packed wnm_timing_msr; } u; } __packed action; + DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */ } u; } __packed __aligned(2); -- cgit v1.2.3 From b20b8aec6ffc07bb547966b356780cd344f20f5b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Feb 2023 09:31:39 +0200 Subject: devlink: Fix netdev notifier chain corruption Cited commit changed devlink to register its netdev notifier block on the global netdev notifier chain instead of on the per network namespace one. However, when changing the network namespace of the devlink instance, devlink still tries to unregister its notifier block from the chain of the old namespace and register it on the chain of the new namespace. This results in corruption of the notifier chains, as the same notifier block is registered on two different chains: The global one and the per network namespace one. In turn, this causes other problems such as the inability to dismantle namespaces due to netdev reference count issues. Fix by preventing devlink from moving its notifier block between namespaces. Reproducer: # echo "10 1" > /sys/bus/netdevsim/new_device # ip netns add test123 # devlink dev reload netdevsim/netdevsim10 netns test123 # ip netns del test123 [ 71.935619] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 71.938348] leaked reference. Fixes: 565b4824c39f ("devlink: change port event netdev notifier from per-net to global") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Reviewed-by: Jacob Keller Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230215073139.1360108-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aad12a179e54..e6e02184c25a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2839,8 +2839,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb); int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); int unregister_netdevice_notifier_net(struct net *net, struct notifier_block *nb); -void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, - struct notifier_block *nb); int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn); -- cgit v1.2.3 From 007ed7900aae514986770f6e14069ebd99c4a4c6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 12 Feb 2023 23:03:24 -0800 Subject: i3c: fix device.h kernel-doc warnings Fix all kernel-doc warnings in : include/linux/i3c/device.h:27: warning: contents before sections include/linux/i3c/device.h:196: warning: Excess function parameter 'dev' description in 'dev_to_i3cdev' Fixes: fa838c8ce537 ("i3c: move dev_to_i3cdev() to use container_of_const()") Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure") Signed-off-by: Randy Dunlap Cc: Boris Brezillon Cc: Greg Kroah-Hartman Cc: Alexandre Belloni Cc: linux-i3c@lists.infradead.org Link: https://lore.kernel.org/r/20230213070324.1564-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/i3c/device.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index ce115ef08fec..90fa83464f00 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -18,17 +18,18 @@ /** * enum i3c_error_code - I3C error codes * + * @I3C_ERROR_UNKNOWN: unknown error, usually means the error is not I3C + * related + * @I3C_ERROR_M0: M0 error + * @I3C_ERROR_M1: M1 error + * @I3C_ERROR_M2: M2 error + * * These are the standard error codes as defined by the I3C specification. * When -EIO is returned by the i3c_device_do_priv_xfers() or * i3c_device_send_hdr_cmds() one can check the error code in * &struct_i3c_priv_xfer.err or &struct i3c_hdr_cmd.err to get a better idea of * what went wrong. * - * @I3C_ERROR_UNKNOWN: unknown error, usually means the error is not I3C - * related - * @I3C_ERROR_M0: M0 error - * @I3C_ERROR_M1: M1 error - * @I3C_ERROR_M2: M2 error */ enum i3c_error_code { I3C_ERROR_UNKNOWN = 0, @@ -189,7 +190,7 @@ struct device *i3cdev_to_dev(struct i3c_device *i3cdev); /** * dev_to_i3cdev() - Returns the I3C device containing @dev - * @dev: device object + * @__dev: device object * * Return: a pointer to an I3C device object. */ -- cgit v1.2.3 From 5e7b9a6ae8c352819a2d998a065910b536de0e8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Feb 2023 07:24:19 +0100 Subject: swiotlb: remove swiotlb_max_segment swiotlb_max_segment has always been a bogus API, so remove it now that the remaining callers are gone. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/swiotlb.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 35bc4e281c21..bcef10e20ea4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -121,7 +121,6 @@ static inline bool is_swiotlb_force_bounce(struct device *dev) void swiotlb_init(bool addressing_limited, unsigned int flags); void __init swiotlb_exit(void); -unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); bool is_swiotlb_active(struct device *dev); void __init swiotlb_adjust_size(unsigned long size); @@ -140,10 +139,6 @@ static inline bool is_swiotlb_force_bounce(struct device *dev) static inline void swiotlb_exit(void) { } -static inline unsigned int swiotlb_max_segment(void) -{ - return 0; -} static inline size_t swiotlb_max_mapping_size(struct device *dev) { return SIZE_MAX; -- cgit v1.2.3 From 5720a18baa4686d56d0a235e6ecbcc55f8d716d7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 16 Feb 2023 11:34:19 -0800 Subject: hwmon: Deprecate [devm_]hwmon_device_register_with_groups Even though the hardware monitoring documentation already stated that new drivers should use [devm_]devm_hwmon_device_register_with_info() to register with the hardware monitoring subsystem, we still get submissions for new drivers using the older APIs. There is no benefit to use those APIs. On the contrary, using the older APIs results in substantially larger code size. Explicitly deprecate [devm_]hwmon_device_register_with_groups() to ensure that all new drivers use the latest API. Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 14325f93c6b2..c1b62384b6ee 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -436,6 +436,10 @@ struct hwmon_chip_info { /* hwmon_device_register() is deprecated */ struct device *hwmon_device_register(struct device *dev); +/* + * hwmon_device_register_with_groups() and + * devm_hwmon_device_register_with_groups() are deprecated. + */ struct device * hwmon_device_register_with_groups(struct device *dev, const char *name, void *drvdata, -- cgit v1.2.3 From 61d03862734360aad470019f160d484403a3923e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 16 Feb 2023 14:12:38 +0000 Subject: arm_pmu: fix event CPU filtering Janne reports that perf has been broken on Apple M1 as of commit: bd27568117664b8b ("perf: Rewrite core context handling") That commit replaced the pmu::filter_match() callback with pmu::filter(), whose return value has the opposite polarity, with true implying events should be ignored rather than scheduled. While an attempt was made to update the logic in armv8pmu_filter() and armpmu_filter() accordingly, the return value remains inverted in a couple of cases: * If the arm_pmu does not have an arm_pmu::filter() callback, armpmu_filter() will always return whether the CPU is supported rather than whether the CPU is not supported. As a result, the perf core will not schedule events on supported CPUs, resulting in a loss of events. Additionally, the perf core will attempt to schedule events on unsupported CPUs, but this will be rejected by armpmu_add(), which may result in a loss of events from other PMUs on those unsupported CPUs. * If the arm_pmu does have an arm_pmu::filter() callback, and armpmu_filter() is called on a CPU which is not supported by the arm_pmu, armpmu_filter() will return false rather than true. As a result, the perf core will attempt to schedule events on unsupported CPUs, but this will be rejected by armpmu_add(), which may result in a loss of events from other PMUs on those unsupported CPUs. This means a loss of events can be seen with any arm_pmu driver, but with the ARMv8 PMUv3 driver (which is the only arm_pmu driver with an arm_pmu::filter() callback) the event loss will be more limited and may go unnoticed, which is how this issue evaded testing so far. Fix the CPU filtering by performing this consistently in armpmu_filter(), and remove the redundant arm_pmu::filter() callback and armv8pmu_filter() implementation. Commit bd2756811766 also silently removed the CHAIN event filtering from armv8pmu_filter(), which will be addressed by a separate patch without using the filter callback. Fixes: bd2756811766 ("perf: Rewrite core context handling") Reported-by: Janne Grunau Link: https://lore.kernel.org/asahi/20230215-arm_pmu_m1_regression-v1-1-f5a266577c8d@jannau.net/ Signed-off-by: Mark Rutland Cc: Will Deacon Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Asahi Lina Cc: Eric Curtin Tested-by: Janne Grunau Link: https://lore.kernel.org/r/20230216141240.3833272-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index ef914a600087..525b5d64e394 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -100,7 +100,6 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); - bool (*filter)(struct pmu *pmu, int cpu); int num_events; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 -- cgit v1.2.3 From 483e6ea1b35aaeffd9b6e6e660d756be49c2a9f5 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Thu, 16 Feb 2023 22:31:59 +0000 Subject: regmap-irq: Remove unused type_invert flag type_invert is deprecated and no longer used; it can now be removed. Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20230216223200.150679-1-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index f26432dc02ef..c2233aeb3843 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1555,8 +1555,6 @@ struct regmap_irq_chip_data; * @ack_invert: Inverted ack register: cleared bits for ack. * @clear_ack: Use this to set 1 and 0 or vice-versa to clear interrupts. * @wake_invert: Inverted wake register: cleared bits are wake enabled. - * @type_invert: Invert the type flags. Deprecated, use config registers - * instead. * @type_in_mask: Use the mask registers for controlling irq type. Use this if * the hardware provides separate bits for rising/falling edge * or low/high level interrupts and they should be combined into @@ -1633,7 +1631,6 @@ struct regmap_irq_chip { unsigned int clear_ack:1; unsigned int wake_invert:1; unsigned int runtime_pm:1; - unsigned int type_invert:1; unsigned int type_in_mask:1; unsigned int clear_on_unmask:1; unsigned int not_fixed_stride:1; -- cgit v1.2.3 From c74e7af1245b2073930afc9a2a340d91e08f0f14 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Thu, 16 Feb 2023 22:32:00 +0000 Subject: regmap-irq: Remove unused mask_invert flag mask_invert is deprecated and no longer used; it can now be removed. Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20230216223200.150679-2-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index c2233aeb3843..4d10790adeb0 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1540,9 +1540,6 @@ struct regmap_irq_chip_data; * @config_base: Base address for IRQ type config regs. If null unsupported. * @irq_reg_stride: Stride to use for chips where registers are not contiguous. * @init_ack_masked: Ack all masked interrupts once during initalization. - * @mask_invert: Inverted mask register: cleared bits are masked out. - * Deprecated; prefer describing an inverted mask register as - * an unmask register. * @mask_unmask_non_inverted: Controls mask bit inversion for chips that set * both @mask_base and @unmask_base. If false, mask and unmask bits are * inverted (which is deprecated behavior); if true, bits will not be @@ -1624,7 +1621,6 @@ struct regmap_irq_chip { const unsigned int *config_base; unsigned int irq_reg_stride; unsigned int init_ack_masked:1; - unsigned int mask_invert:1; unsigned int mask_unmask_non_inverted:1; unsigned int use_ack:1; unsigned int ack_invert:1; -- cgit v1.2.3 From 15ef6a982f40a2b53b057dad24f00c3fb43e7e70 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:49 +0100 Subject: lib/stackdepot: put functions in logical order Patch series "lib/stackdepot: fixes and clean-ups", v2. A set of fixes, comments, and clean-ups I came up with while reading the stack depot code. This patch (of 18): Put stack depot functions' declarations and definitions in a more logical order: 1. Functions that save stack traces into stack depot. 2. Functions that fetch and print stack traces. 3. stack_depot_get_extra_bits that operates on stack depot handles and does not interact with the stack depot storage. No functional changes. Link: https://lkml.kernel.org/r/cover.1676063693.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/daca1319b665d826b94c596b992a8d8117846147.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 9ca7798d7a31..1296a6eeaec0 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -14,17 +14,13 @@ #include typedef u32 depot_stack_handle_t; + /* * Number of bits in the handle that stack depot doesn't use. Users may store * information in them. */ #define STACK_DEPOT_EXTRA_BITS 5 -depot_stack_handle_t __stack_depot_save(unsigned long *entries, - unsigned int nr_entries, - unsigned int extra_bits, - gfp_t gfp_flags, bool can_alloc); - /* * Every user of stack depot has to call stack_depot_init() during its own init * when it's decided that it will be calling stack_depot_save() later. This is @@ -59,17 +55,22 @@ static inline void stack_depot_want_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif +depot_stack_handle_t __stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + unsigned int extra_bits, + gfp_t gfp_flags, bool can_alloc); + depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); -unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); +void stack_depot_print(depot_stack_handle_t stack); int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); -void stack_depot_print(depot_stack_handle_t stack); +unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif -- cgit v1.2.3 From 1c0310add78e7e47e3357c24369b61453a5a72eb Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:51 +0100 Subject: lib/stackdepot, mm: rename stack_depot_want_early_init Rename stack_depot_want_early_init to stack_depot_request_early_init. The old name is confusing, as it hints at returning some kind of intention of stack depot. The new name reflects that this function requests an action from stack depot instead. No functional changes. [akpm@linux-foundation.org: update mm/kmemleak.c] Link: https://lkml.kernel.org/r/359f31bf67429a06e630b4395816a967214ef753.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 1296a6eeaec0..c4e3abc16b16 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -31,26 +31,26 @@ typedef u32 depot_stack_handle_t; * enabled as part of mm_init(), for subsystems where it's known at compile time * that stack depot will be used. * - * Another alternative is to call stack_depot_want_early_init(), when the + * Another alternative is to call stack_depot_request_early_init(), when the * decision to use stack depot is taken e.g. when evaluating kernel boot * parameters, which precedes the enablement point in mm_init(). * - * stack_depot_init() and stack_depot_want_early_init() can be called regardless - * of CONFIG_STACKDEPOT and are no-op when disabled. The actual save/fetch/print - * functions should only be called from code that makes sure CONFIG_STACKDEPOT - * is enabled. + * stack_depot_init() and stack_depot_request_early_init() can be called + * regardless of CONFIG_STACKDEPOT and are no-op when disabled. The actual + * save/fetch/print functions should only be called from code that makes sure + * CONFIG_STACKDEPOT is enabled. */ #ifdef CONFIG_STACKDEPOT int stack_depot_init(void); -void __init stack_depot_want_early_init(void); +void __init stack_depot_request_early_init(void); /* This is supposed to be called only from mm_init() */ int __init stack_depot_early_init(void); #else static inline int stack_depot_init(void) { return 0; } -static inline void stack_depot_want_early_init(void) { } +static inline void stack_depot_request_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif -- cgit v1.2.3 From 36aa1e6779c3c6f8e0d4552544214f5cffe3c287 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:03 +0100 Subject: lib/stacktrace, kasan, kmsan: rework extra_bits interface The current implementation of the extra_bits interface is confusing: passing extra_bits to __stack_depot_save makes it seem that the extra bits are somehow stored in stack depot. In reality, they are only embedded into a stack depot handle and are not used within stack depot. Drop the extra_bits argument from __stack_depot_save and instead provide a new stack_depot_set_extra_bits function (similar to the exsiting stack_depot_get_extra_bits) that saves extra bits into a stack depot handle. Update the callers of __stack_depot_save to use the new interace. This change also fixes a minor issue in the old code: __stack_depot_save does not return NULL if saving stack trace fails and extra_bits is used. Link: https://lkml.kernel.org/r/317123b5c05e2f82854fc55d8b285e0869d3cb77.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index c4e3abc16b16..267f4b2634ee 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -57,7 +57,6 @@ static inline int stack_depot_early_init(void) { return 0; } depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, - unsigned int extra_bits, gfp_t gfp_flags, bool can_alloc); depot_stack_handle_t stack_depot_save(unsigned long *entries, @@ -71,6 +70,9 @@ void stack_depot_print(depot_stack_handle_t stack); int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); +depot_stack_handle_t __must_check stack_depot_set_extra_bits( + depot_stack_handle_t handle, unsigned int extra_bits); + unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif -- cgit v1.2.3 From b232b9995a6dbaafe19d07d81acc039bc84bd569 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:05 +0100 Subject: lib/stackdepot: various comments clean-ups Clean up comments in include/linux/stackdepot.h and lib/stackdepot.c: 1. Rework the initialization comment in stackdepot.h. 2. Rework the header comment in stackdepot.c. 3. Various clean-ups for other comments. Also adjust whitespaces for find_stack and depot_alloc_stack call sites. No functional changes. Link: https://lkml.kernel.org/r/5836231b7954355e2311fc9b5870f697ea8e1f7d.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 267f4b2634ee..afdf8ee7b597 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * A generic stack depot implementation + * Stack depot - a stack trace storage that avoids duplication. * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * - * Based on code by Dmitry Chernenkov. + * Based on the code by Dmitry Chernenkov. */ #ifndef _LINUX_STACKDEPOT_H @@ -17,35 +17,37 @@ typedef u32 depot_stack_handle_t; /* * Number of bits in the handle that stack depot doesn't use. Users may store - * information in them. + * information in them via stack_depot_set/get_extra_bits. */ #define STACK_DEPOT_EXTRA_BITS 5 /* - * Every user of stack depot has to call stack_depot_init() during its own init - * when it's decided that it will be calling stack_depot_save() later. This is - * recommended for e.g. modules initialized later in the boot process, when - * slab_is_available() is true. + * Using stack depot requires its initialization, which can be done in 3 ways: * - * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot - * enabled as part of mm_init(), for subsystems where it's known at compile time - * that stack depot will be used. + * 1. Selecting CONFIG_STACKDEPOT_ALWAYS_INIT. This option is suitable in + * scenarios where it's known at compile time that stack depot will be used. + * Enabling this config makes the kernel initialize stack depot in mm_init(). * - * Another alternative is to call stack_depot_request_early_init(), when the - * decision to use stack depot is taken e.g. when evaluating kernel boot - * parameters, which precedes the enablement point in mm_init(). + * 2. Calling stack_depot_request_early_init() during early boot, before + * stack_depot_early_init() in mm_init() completes. For example, this can + * be done when evaluating kernel boot parameters. + * + * 3. Calling stack_depot_init(). Possible after boot is complete. This option + * is recommended for modules initialized later in the boot process, after + * mm_init() completes. * * stack_depot_init() and stack_depot_request_early_init() can be called - * regardless of CONFIG_STACKDEPOT and are no-op when disabled. The actual - * save/fetch/print functions should only be called from code that makes sure - * CONFIG_STACKDEPOT is enabled. + * regardless of whether CONFIG_STACKDEPOT is enabled and are no-op when this + * config is disabled. The save/fetch/print stack depot functions can only be + * called from the code that makes sure CONFIG_STACKDEPOT is enabled _and_ + * initializes stack depot via one of the ways listed above. */ #ifdef CONFIG_STACKDEPOT int stack_depot_init(void); void __init stack_depot_request_early_init(void); -/* This is supposed to be called only from mm_init() */ +/* Must be only called from mm_init(). */ int __init stack_depot_early_init(void); #else static inline int stack_depot_init(void) { return 0; } -- cgit v1.2.3 From 0621d160f1003a8aedd3628133568ecffdd724f7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:06 +0100 Subject: lib/stackdepot: move documentation comments to stackdepot.h Move all interface- and usage-related documentation comments to include/linux/stackdepot.h. It makes sense to have them in the header where they are available to the interface users. [akpm@linux-foundation.org: grammar fix, per Alexander] Link: https://lkml.kernel.org/r/fbfee41495b306dd8881f9b1c1b80999c885e82f.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 87 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index afdf8ee7b597..e58306783d8e 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -2,6 +2,17 @@ /* * Stack depot - a stack trace storage that avoids duplication. * + * Stack depot is intended to be used by subsystems that need to store and + * later retrieve many potentially duplicated stack traces without wasting + * memory. + * + * For example, KASAN needs to save allocation and free stack traces for each + * object. Storing two stack traces per object requires a lot of memory (e.g. + * SLUB_DEBUG needs 256 bytes per object for that). Since allocation and free + * stack traces often repeat, using stack depot allows to save about 100x space. + * + * Stack traces are never removed from the stack depot. + * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * @@ -57,24 +68,100 @@ static inline void stack_depot_request_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif +/** + * __stack_depot_save - Save a stack trace to stack depot + * + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags + * @can_alloc: Allocate stack pools (increased chance of failure if false) + * + * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is + * %true, stack depot can replenish the stack pools in case no space is left + * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids + * any allocations and fails if no space is left to store the stack trace. + * + * If the provided stack trace comes from the interrupt context, only the part + * up to the interrupt entry is saved. + * + * Context: Any context, but setting @can_alloc to %false is required if + * alloc_pages() cannot be used from the current context. Currently + * this is the case for contexts where neither %GFP_ATOMIC nor + * %GFP_NOWAIT can be used (NMI, raw_spin_lock). + * + * Return: Handle of the stack struct stored in depot, 0 on failure + */ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags, bool can_alloc); +/** + * stack_depot_save - Save a stack trace to stack depot + * + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags + * + * Context: Contexts where allocations via alloc_pages() are allowed. + * See __stack_depot_save() for more details. + * + * Return: Handle of the stack trace stored in depot, 0 on failure + */ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); +/** + * stack_depot_fetch - Fetch a stack trace from stack depot + * + * @handle: Stack depot handle returned from stack_depot_save() + * @entries: Pointer to store the address of the stack trace + * + * Return: Number of frames for the fetched stack + */ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +/** + * stack_depot_print - Print a stack trace from stack depot + * + * @stack: Stack depot handle returned from stack_depot_save() + */ void stack_depot_print(depot_stack_handle_t stack); +/** + * stack_depot_snprint - Print a stack trace from stack depot into a buffer + * + * @handle: Stack depot handle returned from stack_depot_save() + * @buf: Pointer to the print buffer + * @size: Size of the print buffer + * @spaces: Number of leading spaces to print + * + * Return: Number of bytes printed + */ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); +/** + * stack_depot_set_extra_bits - Set extra bits in a stack depot handle + * + * @handle: Stack depot handle returned from stack_depot_save() + * @extra_bits: Value to set the extra bits + * + * Return: Stack depot handle with extra bits set + * + * Stack depot handles have a few unused bits, which can be used for storing + * user-specific information. These bits are transparent to the stack depot. + */ depot_stack_handle_t __must_check stack_depot_set_extra_bits( depot_stack_handle_t handle, unsigned int extra_bits); +/** + * stack_depot_get_extra_bits - Retrieve extra bits from a stack depot handle + * + * @handle: Stack depot handle with extra bits saved + * + * Return: Extra bits retrieved from the stack depot handle + */ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif -- cgit v1.2.3 From 64c8902ed4418317cd416c566f896bd4a92b2efc Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 13 Feb 2023 20:34:39 +0800 Subject: migrate_pages: split unmap_and_move() to _unmap() and _move() This is a preparation patch to batch the folio unmapping and moving. In this patch, unmap_and_move() is split to migrate_folio_unmap() and migrate_folio_move(). So, we can batch _unmap() and _move() in different loops later. To pass some information between unmap and move, the original unused dst->mapping and dst->private are used. Link: https://lkml.kernel.org/r/20230213123444.155149-5-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Baolin Wang Reviewed-by: Xin Hao Cc: Zi Yan Cc: Yang Shi Cc: Oscar Salvador Cc: Matthew Wilcox Cc: Bharata B Rao Cc: Alistair Popple Cc: Minchan Kim Cc: Mike Kravetz Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Andrew Morton --- include/linux/migrate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index bdff950a8bb4..c88b96b48be7 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -18,6 +18,7 @@ struct migration_target_control; * - zero on page migration success; */ #define MIGRATEPAGE_SUCCESS 0 +#define MIGRATEPAGE_UNMAP 1 /** * struct movable_operations - Driver page migration -- cgit v1.2.3 From f7a449f779608efe1941a0e0c4bd7b5f57000be7 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 13 Feb 2023 11:29:22 -0800 Subject: mm: memcontrol: rename memcg_kmem_enabled() Currently there are two kmem-related helper functions with a confusing semantics: memcg_kmem_enabled() and mem_cgroup_kmem_disabled(). The problem is that an obvious expectation memcg_kmem_enabled() == !mem_cgroup_kmem_disabled(), can be false. mem_cgroup_kmem_disabled() is similar to mem_cgroup_disabled(): it returns true only if CONFIG_MEMCG_KMEM is not set or the kmem accounting is disabled using a boot time kernel option "cgroup.memory=nokmem". It never changes the value dynamically. memcg_kmem_enabled() is different: it always returns false until the first non-root memory cgroup will get online (assuming the kernel memory accounting is enabled). It's goal is to improve the performance on systems without the cgroupfs mounted/memory controller enabled or on the systems with only the root memory cgroup. To make things more obvious and avoid potential bugs, let's rename memcg_kmem_enabled() to memcg_kmem_online(). Link: https://lkml.kernel.org/r/20230213192922.1146370-1-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Muchun Song Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Shakeel Butt Cc: Dennis Zhou Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 35478695cabf..5567319027d1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1776,24 +1776,24 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page); int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); -extern struct static_key_false memcg_kmem_enabled_key; +extern struct static_key_false memcg_kmem_online_key; -static inline bool memcg_kmem_enabled(void) +static inline bool memcg_kmem_online(void) { - return static_branch_likely(&memcg_kmem_enabled_key); + return static_branch_likely(&memcg_kmem_online_key); } static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) { - if (memcg_kmem_enabled()) + if (memcg_kmem_online()) return __memcg_kmem_charge_page(page, gfp, order); return 0; } static inline void memcg_kmem_uncharge_page(struct page *page, int order) { - if (memcg_kmem_enabled()) + if (memcg_kmem_online()) __memcg_kmem_uncharge_page(page, order); } @@ -1814,7 +1814,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, { struct mem_cgroup *memcg; - if (!memcg_kmem_enabled()) + if (!memcg_kmem_online()) return; rcu_read_lock(); @@ -1854,7 +1854,7 @@ static inline struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) return NULL; } -static inline bool memcg_kmem_enabled(void) +static inline bool memcg_kmem_online(void) { return false; } -- cgit v1.2.3 From 2954fe60e33da0f4de4d81a4c95c7dddb517d00c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Feb 2023 14:45:22 +0100 Subject: netfilter: let reset rules clean out conntrack entries iptables/nftables support responding to tcp packets with tcp resets. The generated tcp reset packet passes through both output and postrouting netfilter hooks, but conntrack will never see them because the generated skb has its ->nfct pointer copied over from the packet that triggered the reset rule. If the reset rule is used for established connections, this may result in the conntrack entry to be around for a very long time (default timeout is 5 days). One way to avoid this would be to not copy the nf_conn pointer so that the rest packet passes through conntrack too. Problem is that output rules might not have the same conntrack zone setup as the prerouting ones, so its possible that the reset skb won't find the correct entry. Generating a template entry for the skb seems error prone as well. Add an explicit "closing" function that switches a confirmed conntrack entry to closed state and wire this up for tcp. If the entry isn't confirmed, no action is needed because the conntrack entry will never be committed to the table. Reported-by: Russel King Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d8817d381c14..6863e271a9de 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -437,11 +437,13 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #include void nf_ct_attach(struct sk_buff *, const struct sk_buff *); +void nf_ct_set_closing(struct nf_conntrack *nfct); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb); #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} +static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {} struct nf_conntrack_tuple; static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) @@ -459,6 +461,7 @@ struct nf_ct_hook { bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); + void (*set_closing)(struct nf_conntrack *nfct); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; -- cgit v1.2.3 From 4b7296aa6c6618d6a6840fbe857e4990f626bd90 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 17 Jan 2023 15:14:49 +0200 Subject: net/mlx5: Expose bits for querying special mkeys Add needed HW bits to query the values of all special mkeys. Link: https://lore.kernel.org/r/080ebb563a9717c15b1ea75d669aede676df386b.1673960981.git.leon@kernel.org Signed-off-by: Or Har-Toov Reviewed-by: Michael Guralnik Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/mlx5_ifc.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c3d3a2eef7d4..67cfac8fbe46 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1485,7 +1485,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 relaxed_ordering_write[0x1]; u8 relaxed_ordering_read[0x1]; u8 log_max_mkey[0x6]; - u8 reserved_at_f0[0x8]; + u8 reserved_at_f0[0x6]; + u8 terminate_scatter_list_mkey[0x1]; + u8 repeated_mkey[0x1]; u8 dump_fill_mkey[0x1]; u8 reserved_at_f9[0x2]; u8 fast_teardown[0x1]; @@ -5210,7 +5212,11 @@ struct mlx5_ifc_query_special_contexts_out_bits { u8 null_mkey[0x20]; - u8 reserved_at_a0[0x60]; + u8 terminate_scatter_list_mkey[0x20]; + + u8 repeated_mkey[0x20]; + + u8 reserved_at_a0[0x20]; }; struct mlx5_ifc_query_special_contexts_in_bits { -- cgit v1.2.3 From a419bfb7632095410adc3aecb1e863568f049add Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 17 Jan 2023 15:14:50 +0200 Subject: net/mlx5: Change define name for 0x100 lkey value Change define of 0x100 lkey value from MLX5_INVALID_LKEY to be MLX5_TERMINATE_SCATTER_LIST_LKEY as 0x100 is the value of terminate_scatter_list_mkey. Link: https://lore.kernel.org/r/3a116dc3fbae4cb6b76a63d27d418830b06ade0c.1673960981.git.leon@kernel.org Signed-off-by: Or Har-Toov Reviewed-by: Michael Guralnik Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/qp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 4657d5c54abe..df55fbb65717 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -36,7 +36,7 @@ #include #include -#define MLX5_INVALID_LKEY 0x100 +#define MLX5_TERMINATE_SCATTER_LIST_LKEY cpu_to_be32(0x100) /* UMR (3 WQE_BB's) + SIG (3 WQE_BB's) + PSV (mem) + PSV (wire) */ #define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 8) #define MLX5_DIF_SIZE 8 -- cgit v1.2.3 From d88cbbb39b4db057feb1552de31f22c02a21b36f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 17 Feb 2023 10:29:10 +0100 Subject: blk-mq: Reorder fields in 'struct blk_mq_tag_set' Group some variables based on their sizes to reduce hole and avoid padding. On x86_64, this shrinks the size of 'struct blk_mq_tag_set' from 304 to 296 bytes. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/6f249f9b02a3490283ef0278096556de41aa0cf0.1676626130.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 779fba613bd0..dd5ce1137f04 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -473,6 +473,7 @@ enum hctx_type { /** * struct blk_mq_tag_set - tag set that can be shared between request queues + * @ops: Pointers to functions that implement block driver behavior. * @map: One or more ctx -> hctx mappings. One map exists for each * hardware queue type (enum hctx_type) that the driver wishes * to support. There are no restrictions on maps being of the @@ -480,7 +481,6 @@ enum hctx_type { * types. * @nr_maps: Number of elements in the @map array. A number in the range * [1, HCTX_MAX_TYPES]. - * @ops: Pointers to functions that implement block driver behavior. * @nr_hw_queues: Number of hardware queues supported by the block driver that * owns this data structure. * @queue_depth: Number of tags per hardware queue, reserved tags included. @@ -505,9 +505,9 @@ enum hctx_type { * (BLK_MQ_F_BLOCKING). */ struct blk_mq_tag_set { + const struct blk_mq_ops *ops; struct blk_mq_queue_map map[HCTX_MAX_TYPES]; unsigned int nr_maps; - const struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; unsigned int reserved_tags; -- cgit v1.2.3 From 9cbad37ce8122de32a1529e394b468bc101c9e7f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 9 Feb 2023 15:35:01 -0600 Subject: of: Add of_property_present() helper Add an of_property_present() function similar to fwnode_property_present(). of_property_read_bool() could be used directly, but it is cleaner to not use it on non-boolean properties. Reviewed-by: Frank Rowand Tested-by: Frank Rowand Link: https://lore.kernel.org/all/20230215215547.691573-1-robh@kernel.org/ Signed-off-by: Rob Herring --- include/linux/of.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 8bb348666709..37afd04f36eb 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1155,7 +1155,8 @@ static inline int of_property_read_string_index(const struct device_node *np, * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * - * Search for a property in a device node. + * Search for a boolean property in a device node. Usage on non-boolean + * property types is deprecated. * * Return: true if the property exists false otherwise. */ @@ -1167,6 +1168,20 @@ static inline bool of_property_read_bool(const struct device_node *np, return prop ? true : false; } +/** + * of_property_present - Test if a property is present in a node + * @np: device node to search for the property. + * @propname: name of the property to be searched. + * + * Test for a property present in a device node. + * + * Return: true if the property exists false otherwise. + */ +static inline bool of_property_present(const struct device_node *np, const char *propname) +{ + return of_property_read_bool(np, propname); +} + /** * of_property_read_u8_array - Find and read an array of u8 from a property. * -- cgit v1.2.3 From ec4288fe63966b26d53907212ecd05dfa81dd2cc Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 15 Feb 2023 17:35:42 -0800 Subject: hugetlb: check for undefined shift on 32 bit architectures Users can specify the hugetlb page size in the mmap, shmget and memfd_create system calls. This is done by using 6 bits within the flags argument to encode the base-2 logarithm of the desired page size. The routine hstate_sizelog() uses the log2 value to find the corresponding hugetlb hstate structure. Converting the log2 value (page_size_log) to potential hugetlb page size is the simple statement: 1UL << page_size_log Because only 6 bits are used for page_size_log, the left shift can not be greater than 63. This is fine on 64 bit architectures where a long is 64 bits. However, if a value greater than 31 is passed on a 32 bit architecture (where long is 32 bits) the shift will result in undefined behavior. This was generally not an issue as the result of the undefined shift had to exactly match hugetlb page size to proceed. Recent improvements in runtime checking have resulted in this undefined behavior throwing errors such as reported below. Fix by comparing page_size_log to BITS_PER_LONG before doing shift. Link: https://lkml.kernel.org/r/20230216013542.138708-1-mike.kravetz@oracle.com Link: https://lore.kernel.org/lkml/CA+G9fYuei_Tr-vN9GS7SfFyU1y9hNysnf=PB7kT0=yv4MiPgVg@mail.gmail.com/ Fixes: 42d7395feb56 ("mm: support more pagesizes for MAP_HUGETLB/SHM_HUGETLB") Signed-off-by: Mike Kravetz Reported-by: Naresh Kamboju Reviewed-by: Jesper Juhl Acked-by: Muchun Song Tested-by: Linux Kernel Functional Testing Tested-by: Naresh Kamboju Cc: Anders Roxell Cc: Andi Kleen Cc: Sasha Levin Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index db194e2ba69f..9ab9d3105d5c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -743,7 +743,10 @@ static inline struct hstate *hstate_sizelog(int page_size_log) if (!page_size_log) return &default_hstate; - return size_to_hstate(1UL << page_size_log); + if (page_size_log < BITS_PER_LONG) + return size_to_hstate(1UL << page_size_log); + + return NULL; } static inline struct hstate *hstate_vma(struct vm_area_struct *vma) -- cgit v1.2.3 From 2455f0e124d317dd08d337a7550a78a224d4ba41 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 15 Feb 2023 15:33:45 -0700 Subject: tracing: Always use canonical ftrace path The canonical location for the tracefs filesystem is at /sys/kernel/tracing. But, from Documentation/trace/ftrace.rst: Before 4.1, all ftrace tracing control files were within the debugfs file system, which is typically located at /sys/kernel/debug/tracing. For backward compatibility, when mounting the debugfs file system, the tracefs file system will be automatically mounted at: /sys/kernel/debug/tracing Many comments and Kconfig help messages in the tracing code still refer to this older debugfs path, so let's update them to avoid confusion. Link: https://lore.kernel.org/linux-trace-kernel/20230215223350.2658616-2-zwisler@google.com Acked-by: Masami Hiramatsu (Google) Reviewed-by: Mukesh Ojha Signed-off-by: Ross Zwisler Signed-off-by: Steven Rostedt (Google) --- include/linux/kernel.h | 2 +- include/linux/tracepoint.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index fe6efb24d151..40bce7495af8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -297,7 +297,7 @@ bool mac_pton(const char *s, u8 *mac); * * Use tracing_on/tracing_off when you want to quickly turn on or off * tracing. It simply enables or disables the recording of the trace events. - * This also corresponds to the user space /sys/kernel/debug/tracing/tracing_on + * This also corresponds to the user space /sys/kernel/tracing/tracing_on * file, which gives a means for the kernel and userspace to interact. * Place a tracing_off() in the kernel where you want tracing to end. * From user space, examine the trace, and then echo 1 > tracing_on diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 4b33b95eb8be..fa1004fcf810 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -471,7 +471,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * * This is how the trace record is structured and will * * be saved into the ring buffer. These are the fields * * that will be exposed to user-space in - * * /sys/kernel/debug/tracing/events/<*>/format. + * * /sys/kernel/tracing/events/<*>/format. * * * * The declared 'local variable' is called '__entry' * * @@ -531,7 +531,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * tracepoint callback (this is used by programmatic plugins and * can also by used by generic instrumentation like SystemTap), and * it is also used to expose a structured trace record in - * /sys/kernel/debug/tracing/events/. + * /sys/kernel/tracing/events/. * * A set of (un)registration functions can be passed to the variant * TRACE_EVENT_FN to perform any (un)registration work. -- cgit v1.2.3 From 66fb1d5df6ace316a4a6e2c31e13fc123ea2b644 Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Thu, 16 Feb 2023 11:13:45 +0200 Subject: IB/mlx5: Extend debug control for CC parameters This patch adds rtt_resp_dscp to the current debug controllability of congestion control (CC) parameters. rtt_resp_dscp can be read or written through debugfs. If set, its value overwrites the DSCP of the generated RTT response. Signed-off-by: Edward Srouji Reviewed-by: Maor Gottlieb Link: https://lore.kernel.org/r/1dcc3440ee53c688f19f579a051ded81a2aaa70a.1676538714.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 67cfac8fbe46..c63b92aa4c96 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2159,6 +2159,17 @@ struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits { u8 reserved_at_360[0x4a0]; }; +struct mlx5_ifc_cong_control_r_roce_general_bits { + u8 reserved_at_0[0x80]; + + u8 reserved_at_80[0x10]; + u8 rtt_resp_dscp_valid[0x1]; + u8 reserved_at_91[0x9]; + u8 rtt_resp_dscp[0x6]; + + u8 reserved_at_a0[0x760]; +}; + struct mlx5_ifc_cong_control_802_1qau_rp_bits { u8 reserved_at_0[0x80]; @@ -4304,6 +4315,7 @@ union mlx5_ifc_cong_control_roce_ecn_auto_bits { struct mlx5_ifc_cong_control_802_1qau_rp_bits cong_control_802_1qau_rp; struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits cong_control_r_roce_ecn_rp; struct mlx5_ifc_cong_control_r_roce_ecn_np_bits cong_control_r_roce_ecn_np; + struct mlx5_ifc_cong_control_r_roce_general_bits cong_control_r_roce_general; u8 reserved_at_0[0x800]; }; -- cgit v1.2.3 From 50bcfe8df7c73ce51762f65d218b4ef0cc5da3ee Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 17 Feb 2023 13:28:49 +0100 Subject: net: make default_rps_mask a per netns attribute That really was meant to be a per netns attribute from the beginning. The idea is that once proper isolation is in place in the main namespace, additional demux in the child namespaces will be redundant. Let's make child netns default rps mask empty by default. To avoid bloating the netns with a possibly large cpumask, allocate it on-demand during the first write operation. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index efbee940bb03..6a14b7b11766 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -224,7 +224,6 @@ struct net_device_core_stats { #include extern struct static_key_false rps_needed; extern struct static_key_false rfs_needed; -extern struct cpumask rps_default_mask; #endif struct neighbour; -- cgit v1.2.3 From e1f19857f94be09f9526f180e64f20138bd4e394 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 7 Dec 2022 09:43:08 +0100 Subject: fs: namei: Allow follow_down() to uncover auto mounts This function is only used by NFSD to cross mount points. If a mount point is of type auto mount, follow_down() will not uncover it. Add LOOKUP_AUTOMOUNT to the lookup flags to have ->d_automount() called when NFSD walks down the mount tree. Signed-off-by: Richard Weinberger Reviewed-by: Ian Kent Reviewed-by: Jeff Layton Acked-by: Al Viro Signed-off-by: Chuck Lever --- include/linux/namei.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 00fee52df842..c82e2ac65846 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -77,7 +77,7 @@ struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns, struct dentry *base, int len); extern int follow_down_one(struct path *); -extern int follow_down(struct path *); +extern int follow_down(struct path *path, unsigned int flags); extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); -- cgit v1.2.3 From 846b5756d7632523b5bfce78c163aa883aa9d587 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Jan 2023 12:05:43 -0500 Subject: SUNRPC: Add an XDR decoding helper for struct opaque_auth RFC 5531 defines the body of an RPC Call message like this: struct call_body { unsigned int rpcvers; unsigned int prog; unsigned int vers; unsigned int proc; opaque_auth cred; opaque_auth verf; /* procedure-specific parameters start here */ }; In the current server code, decoding a struct opaque_auth type is open-coded in several places, and is thus difficult to harden everywhere. Introduce a helper for decoding an opaque_auth within the context of a xdr_stream. This helper can be shared with all authentication flavor implemenations, even on the client-side. Done as part of hardening the server-side RPC header decoding paths. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f84e2a1358e1..8b5c9d0cdcb5 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -346,6 +346,9 @@ ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size); ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); +ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor, + void **body, unsigned int *body_len); + /** * xdr_align_size - Calculate padded size of an object * @n: Size of an object being XDR encoded (in bytes) -- cgit v1.2.3 From 6181b0c6432bf0807512e85e0c5863f7aca8e515 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Jan 2023 12:05:56 -0500 Subject: SUNRPC: Convert svcauth_unix_accept() to use xdr_stream Done as part of hardening the server-side RPC header decoding path. Since the server-side of the Linux kernel SunRPC implementation ignores the contents of the Call's machinename field, there's no need for its RPC_AUTH_UNIX authenticator to reject names that are larger than UNX_MAXNODENAME. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/msg_prot.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 02117ed0fa2e..c4b0eb2b2f04 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -34,6 +34,11 @@ enum rpc_auth_flavors { RPC_AUTH_GSS_SPKMP = 390011, }; +/* Maximum size (in octets) of the machinename in an AUTH_UNIX + * credential (per RFC 5531 Appendix A) + */ +#define RPC_MAX_MACHINENAME (255) + /* Maximum size (in bytes) of an rpc credential or verifier */ #define RPC_MAX_AUTH_SIZE (400) -- cgit v1.2.3 From b68e4c5c32275e23d35badb7287faaa310c15ba0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Jan 2023 12:06:54 -0500 Subject: SUNRPC: Convert unwrap_integ_data() to use xdr_stream Done as part of hardening the server-side RPC header decoding path. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8b5c9d0cdcb5..accfe8d6e283 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -247,6 +247,7 @@ extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes); extern void __xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); +extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len); extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); -- cgit v1.2.3 From 42140718ea26c47fb06c679451a6aa6703545136 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Jan 2023 12:07:07 -0500 Subject: SUNRPC: Convert unwrap_priv_data() to use xdr_stream Done as part of hardening the server-side RPC header decoding path. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index accfe8d6e283..884df67009f4 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -188,7 +188,6 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) /* * XDR buffer helper functions */ -extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(const struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(const struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); extern void xdr_buf_trim(struct xdr_buf *, unsigned int); -- cgit v1.2.3 From df24ac7a2e3a9d0bc68f1756a880e50bfe4b4522 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Sun, 18 Dec 2022 16:55:53 -0800 Subject: NFSD: enhance inter-server copy cleanup Currently nfsd4_setup_inter_ssc returns the vfsmount of the source server's export when the mount completes. After the copy is done nfsd4_cleanup_inter_ssc is called with the vfsmount of the source server and it searches nfsd_ssc_mount_list for a matching entry to do the clean up. The problems with this approach are (1) the need to search the nfsd_ssc_mount_list and (2) the code has to handle the case where the matching entry is not found which looks ugly. The enhancement is instead of nfsd4_setup_inter_ssc returning the vfsmount, it returns the nfsd4_ssc_umount_item which has the vfsmount embedded in it. When nfsd4_cleanup_inter_ssc is called it's passed with the nfsd4_ssc_umount_item directly to do the clean up so no searching is needed and there is no need to handle the 'not found' case. Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever [ cel: adjusted whitespace and variable/function names ] Reviewed-by: Olga Kornievskaia --- include/linux/nfs_ssc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h index 75843c00f326..22265b1ff080 100644 --- a/include/linux/nfs_ssc.h +++ b/include/linux/nfs_ssc.h @@ -53,6 +53,7 @@ static inline void nfs42_ssc_close(struct file *filep) if (nfs_ssc_client_tbl.ssc_nfs4_ops) (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep); } +#endif struct nfsd4_ssc_umount_item { struct list_head nsui_list; @@ -66,7 +67,6 @@ struct nfsd4_ssc_umount_item { struct vfsmount *nsui_vfsmount; char nsui_ipaddr[RPC_MAX_ADDRBUFLEN + 1]; }; -#endif /* * NFS_FS -- cgit v1.2.3 From 7b402c8db66414abb4001d0c2676553baa619a2b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:29:38 -0500 Subject: SUNRPC: Add XDR encoding helper for opaque_auth RFC 5531 defines an MSG_ACCEPTED Reply message like this: struct accepted_reply { opaque_auth verf; union switch (accept_stat stat) { case SUCCESS: ... In the current server code, struct opaque_auth encoding is open- coded. Introduce a helper that encodes an opaque_auth data item within the context of a xdr_stream. Done as part of hardening the server-side RPC header decoding and encoding paths. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 884df67009f4..f3b6eb9accd7 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -348,6 +348,8 @@ ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor, void **body, unsigned int *body_len); +ssize_t xdr_stream_encode_opaque_auth(struct xdr_stream *xdr, u32 flavor, + void *body, unsigned int body_len); /** * xdr_align_size - Calculate padded size of an object -- cgit v1.2.3 From 8dd41d70f331c342842e8d349d7a1f73b0ba7ccd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:29:44 -0500 Subject: SUNRPC: Push svcxdr_init_encode() into svc_process_common() Now that all vs_dispatch functions invoke svcxdr_init_encode(), it is common code and can be pushed down into the generic RPC server. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f3b6eb9accd7..72014c9216fc 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -474,6 +474,27 @@ xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n) return len; } +/** + * xdr_stream_encode_be32 - Encode a big-endian 32-bit integer + * @xdr: pointer to xdr_stream + * @n: integer to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_be32(struct xdr_stream *xdr, __be32 n) +{ + const size_t len = sizeof(n); + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = n; + return len; +} + /** * xdr_stream_encode_u64 - Encode a 64-bit integer * @xdr: pointer to xdr_stream -- cgit v1.2.3 From 7bb0dfb2234725ba085cacfd35f34c187def92b2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:30:15 -0500 Subject: SUNRPC: Convert unwrap data paths to use xdr_stream for replies We're now moving svcxdr_init_encode() to /before/ the flavor's ->accept method has set rq_auth_slack. Add a helper that can set rq_auth_slack /after/ svcxdr_init_encode() has been called. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index ed722dd33b46..dd9f68acd9f1 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -577,12 +577,34 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp) xdr->buf = buf; xdr->iov = resv; xdr->p = resv->iov_base + resv->iov_len; - xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + xdr->end = resv->iov_base + PAGE_SIZE; buf->len = resv->iov_len; xdr->page_ptr = buf->pages - 1; buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages); - buf->buflen -= rqstp->rq_auth_slack; xdr->rqst = NULL; } +/** + * svcxdr_set_auth_slack - + * @rqstp: RPC transaction + * @slack: buffer space to reserve for the transaction's security flavor + * + * Set the request's slack space requirement, and set aside that much + * space in the rqstp's rq_res.head for use when the auth wraps the Reply. + */ +static inline void svcxdr_set_auth_slack(struct svc_rqst *rqstp, int slack) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *resv = buf->head; + + rqstp->rq_auth_slack = slack; + + xdr->end -= XDR_QUADLEN(slack); + buf->buflen -= rqstp->rq_auth_slack; + + WARN_ON(xdr->iov != resv); + WARN_ON(xdr->p > xdr->end); +} + #endif /* SUNRPC_SVC_H */ -- cgit v1.2.3 From 5df25676de2e13b2cb67140fd43bcbfdd60ef0df Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:30:52 -0500 Subject: SUNRPC: Remove no-longer-used helper functions The svc_get/put helpers are no longer used. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 66 +--------------------------------------------- 1 file changed, 1 insertion(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dd9f68acd9f1..32eb98e621c3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -193,40 +193,6 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); #define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ + 2 + 1) -static inline u32 svc_getnl(struct kvec *iov) -{ - __be32 val, *vp; - vp = iov->iov_base; - val = *vp++; - iov->iov_base = (void*)vp; - iov->iov_len -= sizeof(__be32); - return ntohl(val); -} - -static inline void svc_putnl(struct kvec *iov, u32 val) -{ - __be32 *vp = iov->iov_base + iov->iov_len; - *vp = htonl(val); - iov->iov_len += sizeof(__be32); -} - -static inline __be32 svc_getu32(struct kvec *iov) -{ - __be32 val, *vp; - vp = iov->iov_base; - val = *vp++; - iov->iov_base = (void*)vp; - iov->iov_len -= sizeof(__be32); - return val; -} - -static inline void svc_putu32(struct kvec *iov, __be32 val) -{ - __be32 *vp = iov->iov_base + iov->iov_len; - *vp = val; - iov->iov_len += sizeof(__be32); -} - /* * The context of a single thread, including the request currently being * processed. @@ -345,29 +311,6 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst) return (struct sockaddr *) &rqst->rq_daddr; } -/* - * Check buffer bounds after decoding arguments - */ -static inline int -xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p) -{ - char *cp = (char *)p; - struct kvec *vec = &rqstp->rq_arg.head[0]; - return cp >= (char*)vec->iov_base - && cp <= (char*)vec->iov_base + vec->iov_len; -} - -static inline int -xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p) -{ - struct kvec *vec = &rqstp->rq_res.head[0]; - char *cp = (char*)p; - - vec->iov_len = cp - (char*)vec->iov_base; - - return vec->iov_len <= PAGE_SIZE; -} - static inline void svc_free_res_pages(struct svc_rqst *rqstp) { while (rqstp->rq_next_page != rqstp->rq_respages) { @@ -540,9 +483,6 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) * svcxdr_init_decode - Prepare an xdr_stream for Call decoding * @rqstp: controlling server RPC transaction context * - * This function currently assumes the RPC header in rq_arg has - * already been decoded. Upon return, xdr->p points to the - * location of the upper layer header. */ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) { @@ -550,11 +490,7 @@ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) struct xdr_buf *buf = &rqstp->rq_arg; struct kvec *argv = buf->head; - /* - * svc_getnl() and friends do not keep the xdr_buf's ::len - * field up to date. Refresh that field before initializing - * the argument decoding stream. - */ + WARN_ON(buf->len != buf->head->iov_len + buf->page_len + buf->tail->iov_len); buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; xdr_init_decode(xdr, buf, argv->iov_base, NULL); -- cgit v1.2.3 From cee4db19452467eef8ab93c6eb6a3a84d11d25d7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:30:59 -0500 Subject: SUNRPC: Refactor RPC server dispatch method Currently, svcauth_gss_accept() pre-reserves response buffer space for the RPC payload length and GSS sequence number before returning to the dispatcher, which then adds the header's accept_stat field. The problem is the accept_stat field is supposed to go before the length and seq_num fields. So svcauth_gss_release() has to relocate the accept_stat value (see svcauth_gss_prepare_to_wrap()). To enable these fields to be added to the response buffer in the correct (final) order, the pointer to the accept_stat has to be made available to svcauth_gss_accept() so that it can set it before reserving space for the length and seq_num fields. As a first step, move the pointer to the location of the accept_stat field into struct svc_rqst. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 32eb98e621c3..f40a90ca5de6 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -251,6 +251,7 @@ struct svc_rqst { void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ + __be32 *rq_accept_statp; void * rq_auth_data; /* flavor-specific data */ __be32 rq_auth_stat; /* authentication status */ int rq_auth_slack; /* extra space xdr code @@ -337,7 +338,7 @@ struct svc_deferred_req { struct svc_process_info { union { - int (*dispatch)(struct svc_rqst *, __be32 *); + int (*dispatch)(struct svc_rqst *rqstp); struct { unsigned int lovers; unsigned int hivers; @@ -389,7 +390,7 @@ struct svc_version { bool vs_need_cong_ctrl; /* Dispatch function */ - int (*vs_dispatch)(struct svc_rqst *, __be32 *); + int (*vs_dispatch)(struct svc_rqst *rqstp); }; /* -- cgit v1.2.3 From 4bcf0343e8a69eb22f7e83bfa7cfce32a28c9d95 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:31:05 -0500 Subject: SUNRPC: Set rq_accept_statp inside ->accept methods To navigate around the space that svcauth_gss_accept() reserves for the RPC payload body length and sequence number fields, svcauth_gss_release() does a little dance with the reply's accept_stat, moving the accept_stat value in the response buffer down by two words. Instead, let's have the ->accept() methods each set the proper final location of the accept_stat to avoid having to move things. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f40a90ca5de6..392d2d2620fa 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -544,4 +544,23 @@ static inline void svcxdr_set_auth_slack(struct svc_rqst *rqstp, int slack) WARN_ON(xdr->p > xdr->end); } +/** + * svcxdr_set_accept_stat - Reserve space for the accept_stat field + * @rqstp: RPC transaction context + * + * Return values: + * %true: Success + * %false: No response buffer space was available + */ +static inline bool svcxdr_set_accept_stat(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + + rqstp->rq_accept_statp = xdr_reserve_space(xdr, XDR_UNIT); + if (unlikely(!rqstp->rq_accept_statp)) + return false; + *rqstp->rq_accept_statp = rpc_success; + return true; +} + #endif /* SUNRPC_SVC_H */ -- cgit v1.2.3 From 65ba3d2425bf51165b6e88509c632bd15d12883d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 10 Jan 2023 10:31:54 -0500 Subject: SUNRPC: Use per-CPU counters to tally server RPC counts - Improves counting accuracy - Reduces cross-CPU memory traffic Signed-off-by: Chuck Lever --- include/linux/lockd/lockd.h | 4 ++-- include/linux/sunrpc/svc.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 70ce419e2709..84de6b7c9948 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -196,9 +196,9 @@ struct nlm_block { * Global variables */ extern const struct rpc_program nlm_program; -extern const struct svc_procedure nlmsvc_procedures[]; +extern const struct svc_procedure nlmsvc_procedures[24]; #ifdef CONFIG_LOCKD_V4 -extern const struct svc_procedure nlmsvc_procedures4[]; +extern const struct svc_procedure nlmsvc_procedures4[24]; #endif extern int nlmsvc_grace_period; extern unsigned long nlmsvc_timeout; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 392d2d2620fa..1b078c9a2d60 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -377,7 +377,7 @@ struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ const struct svc_procedure *vs_proc; /* per-procedure info */ - unsigned int *vs_count; /* call counts */ + unsigned long __percpu *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ /* Don't register with rpcbind */ -- cgit v1.2.3 From ccf08bed6e7a80519569456edd2ea21b7b1701c6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 10 Jan 2023 10:32:00 -0500 Subject: SUNRPC: Replace pool stats with per-CPU variables Eliminate the use of bus-locked operations in svc_xprt_enqueue(), which is a hot path. Replace them with per-cpu variables to reduce cross-CPU memory bus traffic. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1b078c9a2d60..877891536c2f 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -21,14 +21,6 @@ #include #include -/* statistics for svc_pool structures */ -struct svc_pool_stats { - atomic_long_t packets; - unsigned long sockets_queued; - atomic_long_t threads_woken; - atomic_long_t threads_timedout; -}; - /* * * RPC service thread pool. @@ -45,7 +37,12 @@ struct svc_pool { struct list_head sp_sockets; /* pending sockets */ unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ - struct svc_pool_stats sp_stats; /* statistics on pool operation */ + + /* statistics on pool operation */ + struct percpu_counter sp_sockets_queued; + struct percpu_counter sp_threads_woken; + struct percpu_counter sp_threads_timedout; + #define SP_TASK_PENDING (0) /* still work to do even if no * xprt is queued. */ #define SP_CONGESTED (1) -- cgit v1.2.3 From 97648b94bd9dbb987fe7e4067deecdb47d4fd7e7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:20:22 -0500 Subject: SUNRPC: Add header ifdefs to linux/sunrpc/gss_krb5.h Standard convention: Ensure the contents of the header are included only once per source file. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 91f43d86879d..0135139ddf20 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -1,6 +1,4 @@ /* - * linux/include/linux/sunrpc/gss_krb5_types.h - * * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, * lib/gssapi/krb5/gssapiP_krb5.h, and others * @@ -36,6 +34,9 @@ * */ +#ifndef _LINUX_SUNRPC_GSS_KRB5_H +#define _LINUX_SUNRPC_GSS_KRB5_H + #include #include #include @@ -316,3 +317,5 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, void gss_krb5_make_confounder(char *p, u32 conflen); + +#endif /* _LINUX_SUNRPC_GSS_KRB5_H */ -- cgit v1.2.3 From f03640a1a9782f4bf7c1db63e2e6a9598c6d2c6e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:20:28 -0500 Subject: SUNRPC: Remove .blocksize field from struct gss_krb5_enctype It is not clear from documenting comments, specifications, or code usage what value the gss_krb5_enctype.blocksize field is supposed to store. The "encryption blocksize" depends only on the cipher being used, so that value can be derived where it's needed instead of stored as a constant. RFC 3961 Section 5.2 says: > cipher block size, c > This is the block size of the block cipher underlying the > encryption and decryption functions indicated above, used for key > derivation and for the size of the message confounder and initial > vector. (If a block cipher is not in use, some comparable > parameter should be determined.) It must be at least 5 octets. > > This is not actually an independent parameter; rather, it is a > property of the functions E and D. It is listed here to clarify > the distinction between it and the message block size, m. In the Linux kernel's implemenation of the SunRPC RPCSEC GSS Kerberos 5 mechanism, the cipher block size, which is dependent on the encryption and decryption transforms, is used only in krb5_derive_key(), so it is straightforward to replace it. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 0135139ddf20..9a833825b55b 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -64,7 +64,6 @@ struct gss_krb5_enctype { const char *cksum_name; /* crypto checksum name */ const u16 signalg; /* signing algorithm */ const u16 sealalg; /* sealing algorithm */ - const u32 blocksize; /* encryption blocksize */ const u32 conflen; /* confounder length (normally the same as the blocksize) */ -- cgit v1.2.3 From 4be416a5f2803d421c950cc48e8e0c1eaaa8c773 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:20:35 -0500 Subject: SUNRPC: Remove .conflen field from struct gss_krb5_enctype Now that arcfour-hmac is gone, the confounder length is again the same as the cipher blocksize for every implemented enctype. The gss_krb5_enctype::conflen field is no longer necessary. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 9a833825b55b..51860e3a0216 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -64,9 +64,6 @@ struct gss_krb5_enctype { const char *cksum_name; /* crypto checksum name */ const u16 signalg; /* signing algorithm */ const u16 sealalg; /* sealing algorithm */ - const u32 conflen; /* confounder length - (normally the same as - the blocksize) */ const u32 cksumlength; /* checksum length */ const u32 keyed_cksum; /* is it a keyed cksum? */ const u32 keybytes; /* raw key len, in bytes */ -- cgit v1.2.3 From 7f675ca7757bfeb70e19d187dc3be44deb836da8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:20:41 -0500 Subject: SUNRPC: Improve Kerberos confounder generation Other common Kerberos implementations use a fully random confounder for encryption. The reason for this is explained in the new comment added by this patch. The current get_random_bytes() implementation does not exhaust system entropy. Since confounder generation is part of Kerberos itself rather than the GSS-API Kerberos mechanism, the function is renamed and moved. Note that light top-down analysis shows that the SHA-1 transform is by far the most CPU-intensive part of encryption. Thus we do not expect this change to result in a significant performance impact. However, eventually it might be necessary to generate an independent stream of confounders for each Kerberos context to help improve I/O parallelism. Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 51860e3a0216..f0fac1e69731 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -311,7 +311,4 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, struct xdr_buf *buf, u32 *plainoffset, u32 *plainlen); -void -gss_krb5_make_confounder(char *p, u32 conflen); - #endif /* _LINUX_SUNRPC_GSS_KRB5_H */ -- cgit v1.2.3 From 7989a4f4ab5437ec82b0b59984594f848f12b36a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:20:54 -0500 Subject: SUNRPC: Refactor set-up for aux_cipher Hoist the name of the aux_cipher into struct gss_krb5_enctype to prepare for obscuring the encryption keys just after they are derived. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index f0fac1e69731..34d54714c6a3 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -61,6 +61,7 @@ struct gss_krb5_enctype { const u32 ctype; /* checksum type */ const char *name; /* "friendly" name */ const char *encrypt_name; /* crypto encrypt name */ + const char *aux_cipher; /* aux encrypt cipher name */ const char *cksum_name; /* crypto checksum name */ const u16 signalg; /* signing algorithm */ const u16 sealalg; /* sealing algorithm */ -- cgit v1.2.3 From 9f0b49f933ab1ec5e7140a43eec72b0c5181cabf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:01 -0500 Subject: SUNRPC: Obscure Kerberos encryption keys The encryption subkeys are not used after the cipher transforms have been allocated and keyed. There is no need to retain them in struct krb5_ctx. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 34d54714c6a3..46eaa2ee9c21 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -110,8 +110,6 @@ struct krb5_ctx { struct xdr_netobj mech_used; u8 initiator_sign[GSS_KRB5_MAX_KEYLEN]; u8 acceptor_sign[GSS_KRB5_MAX_KEYLEN]; - u8 initiator_seal[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_seal[GSS_KRB5_MAX_KEYLEN]; u8 initiator_integ[GSS_KRB5_MAX_KEYLEN]; u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; }; -- cgit v1.2.3 From 2dbe0cac3cd6d747579b0b347145326eddfd4e5c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:07 -0500 Subject: SUNRPC: Obscure Kerberos signing keys There's no need to keep the signing keys around if we instead allocate and key an ahash and keep that. This not only enables the subkeys to be destroyed immediately after deriving them, but it makes the Kerberos signing code path more efficient. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 46eaa2ee9c21..9d897f1ac85a 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -102,14 +102,14 @@ struct krb5_ctx { struct crypto_sync_skcipher *initiator_enc; struct crypto_sync_skcipher *acceptor_enc_aux; struct crypto_sync_skcipher *initiator_enc_aux; + struct crypto_ahash *acceptor_sign; + struct crypto_ahash *initiator_sign; u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ u8 cksum[GSS_KRB5_MAX_KEYLEN]; atomic_t seq_send; atomic64_t seq_send64; time64_t endtime; struct xdr_netobj mech_used; - u8 initiator_sign[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_sign[GSS_KRB5_MAX_KEYLEN]; u8 initiator_integ[GSS_KRB5_MAX_KEYLEN]; u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; }; @@ -252,7 +252,6 @@ u32 gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, int len, struct xdr_buf *buf); - u32 krb5_encrypt(struct crypto_sync_skcipher *key, void *iv, void *in, void *out, int length); -- cgit v1.2.3 From 8270dbfcebea5b68037a84ad1710e2cfa499b82f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:13 -0500 Subject: SUNRPC: Obscure Kerberos integrity keys There's no need to keep the integrity keys around if we instead allocate and key a pair of ahashes and keep those. This not only enables the subkeys to be destroyed immediately after deriving them, but it makes the Kerberos integrity code path more efficient. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 9d897f1ac85a..85e65232bb61 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -104,14 +104,14 @@ struct krb5_ctx { struct crypto_sync_skcipher *initiator_enc_aux; struct crypto_ahash *acceptor_sign; struct crypto_ahash *initiator_sign; + struct crypto_ahash *initiator_integ; + struct crypto_ahash *acceptor_integ; u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ u8 cksum[GSS_KRB5_MAX_KEYLEN]; atomic_t seq_send; atomic64_t seq_send64; time64_t endtime; struct xdr_netobj mech_used; - u8 initiator_integ[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; }; /* The length of the Kerberos GSS token header */ @@ -233,11 +233,6 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, struct xdr_buf *body, int body_offset, u8 *cksumkey, unsigned int usage, struct xdr_netobj *cksumout); -u32 -make_checksum_v2(struct krb5_ctx *, char *header, int hdrlen, - struct xdr_buf *body, int body_offset, u8 *key, - unsigned int usage, struct xdr_netobj *cksum); - u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, struct xdr_netobj *); -- cgit v1.2.3 From e01b2c79f4af1298b961116aba3e64367fe73286 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:20 -0500 Subject: SUNRPC: Refactor the GSS-API Per Message calls in the Kerberos mechanism Replace a number of switches on encryption type so that all of them don't have to be modified when adding or removing support for an enctype. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 85e65232bb61..f1201478fdd5 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -84,6 +84,15 @@ struct gss_krb5_enctype { u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 len, struct xdr_buf *buf, u32 *headskip, u32 *tailskip); /* v2 decryption function */ + u32 (*get_mic)(struct krb5_ctx *kctx, struct xdr_buf *text, + struct xdr_netobj *token); + u32 (*verify_mic)(struct krb5_ctx *kctx, struct xdr_buf *message_buffer, + struct xdr_netobj *read_token); + u32 (*wrap)(struct krb5_ctx *kctx, int offset, + struct xdr_buf *buf, struct page **pages); + u32 (*unwrap)(struct krb5_ctx *kctx, int offset, int len, + struct xdr_buf *buf, unsigned int *slack, + unsigned int *align); }; /* krb5_ctx flags definitions */ @@ -233,20 +242,6 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, struct xdr_buf *body, int body_offset, u8 *cksumkey, unsigned int usage, struct xdr_netobj *cksumout); -u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, - struct xdr_netobj *); - -u32 gss_verify_mic_kerberos(struct gss_ctx *, struct xdr_buf *, - struct xdr_netobj *); - -u32 -gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, - struct xdr_buf *outbuf, struct page **pages); - -u32 -gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, int len, - struct xdr_buf *buf); - u32 krb5_encrypt(struct crypto_sync_skcipher *key, void *iv, void *in, void *out, int length); -- cgit v1.2.3 From 279a67cdd491a53028eb0b52508383098c6d992b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:26 -0500 Subject: SUNRPC: Remove another switch on ctx->enctype Replace another switch on encryption type so that it does not have to be modified when adding or removing support for an enctype. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index f1201478fdd5..68ae0c3d4cf7 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -75,6 +75,7 @@ struct gss_krb5_enctype { u32 (*decrypt) (struct crypto_sync_skcipher *tfm, void *iv, void *in, void *out, int length); /* decryption function */ + int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask); u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, struct xdr_netobj *in, struct xdr_netobj *out); /* complete key generation */ -- cgit v1.2.3 From 17781b2ce41a8915163d7cdada021f809ccd49f0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:45 -0500 Subject: SUNRPC: Replace KRB5_SUPPORTED_ENCTYPES macro Now that all consumers of the KRB5_SUPPORTED_ENCTYPES macro are within the SunRPC layer, the macro can be replaced with something private and more flexible. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5_enctypes.h | 41 -------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 include/linux/sunrpc/gss_krb5_enctypes.h (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h deleted file mode 100644 index 87eea679d750..000000000000 --- a/include/linux/sunrpc/gss_krb5_enctypes.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Define the string that exports the set of kernel-supported - * Kerberos enctypes. This list is sent via upcall to gssd, and - * is also exposed via the nfsd /proc API. The consumers generally - * treat this as an ordered list, where the first item in the list - * is the most preferred. - */ - -#ifndef _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H -#define _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H - -#ifdef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES - -/* - * NB: This list includes DES3_CBC_SHA1, which was deprecated by RFC 8429. - * - * ENCTYPE_AES256_CTS_HMAC_SHA1_96 - * ENCTYPE_AES128_CTS_HMAC_SHA1_96 - * ENCTYPE_DES3_CBC_SHA1 - */ -#define KRB5_SUPPORTED_ENCTYPES "18,17,16" - -#else /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ - -/* - * NB: This list includes encryption types that were deprecated - * by RFC 8429 and RFC 6649. - * - * ENCTYPE_AES256_CTS_HMAC_SHA1_96 - * ENCTYPE_AES128_CTS_HMAC_SHA1_96 - * ENCTYPE_DES3_CBC_SHA1 - * ENCTYPE_DES_CBC_MD5 - * ENCTYPE_DES_CBC_CRC - * ENCTYPE_DES_CBC_MD4 - */ -#define KRB5_SUPPORTED_ENCTYPES "18,17,16,3,1,2" - -#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ - -#endif /* _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H */ -- cgit v1.2.3 From d50b8152c992ac88c5f1f0cc8ade6ee0aa0a3704 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:58 -0500 Subject: SUNRPC: Remove ->encrypt and ->decrypt methods from struct gss_krb5_enctype Clean up: ->encrypt is set to only one value. Replace the two remaining call sites with direct calls to krb5_encrypt(). There have never been any call sites for the ->decrypt() method. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 68ae0c3d4cf7..a0646df12beb 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -69,12 +69,6 @@ struct gss_krb5_enctype { const u32 keyed_cksum; /* is it a keyed cksum? */ const u32 keybytes; /* raw key len, in bytes */ const u32 keylength; /* final key len, in bytes */ - u32 (*encrypt) (struct crypto_sync_skcipher *tfm, - void *iv, void *in, void *out, - int length); /* encryption function */ - u32 (*decrypt) (struct crypto_sync_skcipher *tfm, - void *iv, void *in, void *out, - int length); /* decryption function */ int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask); u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, struct xdr_netobj *in, @@ -243,14 +237,6 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, struct xdr_buf *body, int body_offset, u8 *cksumkey, unsigned int usage, struct xdr_netobj *cksumout); -u32 -krb5_encrypt(struct crypto_sync_skcipher *key, - void *iv, void *in, void *out, int length); - -u32 -krb5_decrypt(struct crypto_sync_skcipher *key, - void *iv, void *in, void *out, int length); - int gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *outbuf, int offset, struct page **pages); -- cgit v1.2.3 From ae6ad5d0b7901b301234143e93624417ac9fd9ef Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:22:04 -0500 Subject: SUNRPC: Rename .encrypt_v2 and .decrypt_v2 methods Clean up: there is now only one encrypt and only one decrypt method, thus there is no longer a need for the v2-suffixed method names. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index a0646df12beb..85fce36c242d 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -73,12 +73,10 @@ struct gss_krb5_enctype { u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, struct xdr_netobj *in, struct xdr_netobj *out); /* complete key generation */ - u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, - struct page **pages); /* v2 encryption function */ - u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 len, - struct xdr_buf *buf, u32 *headskip, - u32 *tailskip); /* v2 decryption function */ + u32 (*encrypt)(struct krb5_ctx *kctx, u32 offset, + struct xdr_buf *buf, struct page **pages); + u32 (*decrypt)(struct krb5_ctx *kctx, u32 offset, u32 len, + struct xdr_buf *buf, u32 *headskip, u32 *tailskip); u32 (*get_mic)(struct krb5_ctx *kctx, struct xdr_buf *text, struct xdr_netobj *token); u32 (*verify_mic)(struct krb5_ctx *kctx, struct xdr_buf *message_buffer, @@ -276,14 +274,4 @@ gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e, struct xdr_netobj *randombits, struct xdr_netobj *key); -u32 -gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, - struct page **pages); - -u32 -gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, - struct xdr_buf *buf, u32 *plainoffset, - u32 *plainlen); - #endif /* _LINUX_SUNRPC_GSS_KRB5_H */ -- cgit v1.2.3 From 2691a27d9b3e6a48adeb87a9dcf4e8a0ca84a26e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:22:11 -0500 Subject: SUNRPC: Hoist KDF into struct gss_krb5_enctype Each Kerberos enctype can have a different KDF. Refactor the key derivation path to support different KDFs for the enctypes introduced in subsequent patches. In particular, expose the key derivation function in struct gss_krb5_enctype instead of the enctype's preferred random-to-key function. The latter is usually the identity function and is only ever called during key derivation, so have each KDF call it directly. A couple of extra clean-ups: - Deduplicate the set_cdata() helper - Have ->derive_key return negative errnos, in accordance with usual kernel coding conventions This patch is a little bigger than I'd like, but these are all mechanical changes and they are all to the same areas of code. No behavior change is intended. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 85fce36c242d..04addef79177 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -70,9 +70,11 @@ struct gss_krb5_enctype { const u32 keybytes; /* raw key len, in bytes */ const u32 keylength; /* final key len, in bytes */ int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask); - u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *in, - struct xdr_netobj *out); /* complete key generation */ + int (*derive_key)(const struct gss_krb5_enctype *gk5e, + const struct xdr_netobj *in, + struct xdr_netobj *out, + const struct xdr_netobj *label, + gfp_t gfp_mask); u32 (*encrypt)(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, struct page **pages); u32 (*decrypt)(struct krb5_ctx *kctx, u32 offset, u32 len, @@ -257,21 +259,4 @@ krb5_get_seq_num(struct krb5_ctx *kctx, int xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen); -u32 -krb5_derive_key(const struct gss_krb5_enctype *gk5e, - const struct xdr_netobj *inkey, - struct xdr_netobj *outkey, - const struct xdr_netobj *in_constant, - gfp_t gfp_mask); - -u32 -gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *randombits, - struct xdr_netobj *key); - -u32 -gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *randombits, - struct xdr_netobj *key); - #endif /* _LINUX_SUNRPC_GSS_KRB5_H */ -- cgit v1.2.3 From af664fc9023e69d1a90800c0f815c296bf727e18 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:22:30 -0500 Subject: SUNRPC: Add new subkey length fields The aes256-cts-hmac-sha384-192 enctype specifies the length of its checksum and integrity subkeys as 192 bits, but the length of its encryption subkey (Ke) as 256 bits. Add new fields to struct gss_krb5_enctype that specify the key lengths individually, and where needed, use the correct new field instead of ->keylength. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 04addef79177..3e97d2a7c87d 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -42,10 +42,16 @@ #include #include +/* + * The RFCs often specify payload lengths in bits. This helper + * converts a specified bit-length to the number of octets/bytes. + */ +#define BITS2OCTETS(x) ((x) / 8) + /* Length of constant used in key derivation */ #define GSS_KRB5_K5CLENGTH (5) -/* Maximum key length (in bytes) for the supported crypto algorithms*/ +/* Maximum key length (in bytes) for the supported crypto algorithms */ #define GSS_KRB5_MAX_KEYLEN (32) /* Maximum checksum function output for the supported crypto algorithms */ @@ -68,7 +74,11 @@ struct gss_krb5_enctype { const u32 cksumlength; /* checksum length */ const u32 keyed_cksum; /* is it a keyed cksum? */ const u32 keybytes; /* raw key len, in bytes */ - const u32 keylength; /* final key len, in bytes */ + const u32 keylength; /* protocol key length, in octets */ + const u32 Kc_length; /* checksum subkey length, in octets */ + const u32 Ke_length; /* encryption subkey length, in octets */ + const u32 Ki_length; /* integrity subkey length, in octets */ + int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask); int (*derive_key)(const struct gss_krb5_enctype *gk5e, const struct xdr_netobj *in, -- cgit v1.2.3 From a40cf7530d3104793f9361e69e84ada7960724f2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:22:43 -0500 Subject: SUNRPC: Add gk5e definitions for RFC 8009 encryption types Fill in entries in the supported_gss_krb5_enctypes array for the encryption types defined in RFC 8009. These new enctypes use the SHA-256 and SHA-384 message digest algorithms (as defined in FIPS-180) instead of the deprecated SHA-1 algorithm, and are thus more secure. Note that NIST has scheduled SHA-1 for deprecation: https://www.nist.gov/news-events/news/2022/12/nist-retires-sha-1-cryptographic-algorithm Thus these new encryption types are placed under a separate CONFIG option to enable distributors to separately introduce support for the AES-SHA2 enctypes and deprecate support for the current set of AES-SHA1 encryption types as their user space allows. As this implementation is still a "beta", the default is to not build it automatically. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 3e97d2a7c87d..8ff397b5c04b 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -54,8 +54,8 @@ /* Maximum key length (in bytes) for the supported crypto algorithms */ #define GSS_KRB5_MAX_KEYLEN (32) -/* Maximum checksum function output for the supported crypto algorithms */ -#define GSS_KRB5_MAX_CKSUM_LEN (20) +/* Maximum checksum function output for the supported enctypes */ +#define GSS_KRB5_MAX_CKSUM_LEN (24) /* Maximum blocksize for the supported crypto algorithms */ #define GSS_KRB5_MAX_BLOCKSIZE (16) @@ -160,6 +160,12 @@ enum seal_alg { SEAL_ALG_DES3KD = 0x0002 }; +/* + * These values are assigned by IANA and published via the + * subregistry at the link below: + * + * https://www.iana.org/assignments/kerberos-parameters/kerberos-parameters.xhtml#kerberos-parameters-2 + */ #define CKSUMTYPE_CRC32 0x0001 #define CKSUMTYPE_RSA_MD4 0x0002 #define CKSUMTYPE_RSA_MD4_DES 0x0003 @@ -170,6 +176,8 @@ enum seal_alg { #define CKSUMTYPE_HMAC_SHA1_DES3 0x000c #define CKSUMTYPE_HMAC_SHA1_96_AES128 0x000f #define CKSUMTYPE_HMAC_SHA1_96_AES256 0x0010 +#define CKSUMTYPE_HMAC_SHA256_128_AES128 0x0013 +#define CKSUMTYPE_HMAC_SHA384_192_AES256 0x0014 #define CKSUMTYPE_HMAC_MD5_ARCFOUR -138 /* Microsoft md5 hmac cksumtype */ /* from gssapi_err_krb5.h */ @@ -190,6 +198,11 @@ enum seal_alg { /* per Kerberos v5 protocol spec crypto types from the wire. * these get mapped to linux kernel crypto routines. + * + * These values are assigned by IANA and published via the + * subregistry at the link below: + * + * https://www.iana.org/assignments/kerberos-parameters/kerberos-parameters.xhtml#kerberos-parameters-1 */ #define ENCTYPE_NULL 0x0000 #define ENCTYPE_DES_CBC_CRC 0x0001 /* DES cbc mode with CRC-32 */ @@ -203,6 +216,8 @@ enum seal_alg { #define ENCTYPE_DES3_CBC_SHA1 0x0010 #define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011 #define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012 +#define ENCTYPE_AES128_CTS_HMAC_SHA256_128 0x0013 +#define ENCTYPE_AES256_CTS_HMAC_SHA384_192 0x0014 #define ENCTYPE_ARCFOUR_HMAC 0x0017 #define ENCTYPE_ARCFOUR_HMAC_EXP 0x0018 #define ENCTYPE_UNKNOWN 0x01ff -- cgit v1.2.3 From 3394682fba3b9010c6147e94f37633f044876e5e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:23:08 -0500 Subject: SUNRPC: Support the Camellia enctypes RFC 6803 defines two encryption types that use Camellia ciphers (RFC 3713) and CMAC digests. Implement support for those in SunRPC's GSS Kerberos 5 mechanism. There has not been an explicit request to support these enctypes. However, this new set of enctypes provides a good alternative to the AES-SHA1 enctypes that are to be deprecated at some point. As this implementation is still a "beta", the default is to not build it automatically. Tested-by: Scott Mayhew Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 8ff397b5c04b..cbb6c8192890 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -176,6 +176,8 @@ enum seal_alg { #define CKSUMTYPE_HMAC_SHA1_DES3 0x000c #define CKSUMTYPE_HMAC_SHA1_96_AES128 0x000f #define CKSUMTYPE_HMAC_SHA1_96_AES256 0x0010 +#define CKSUMTYPE_CMAC_CAMELLIA128 0x0011 +#define CKSUMTYPE_CMAC_CAMELLIA256 0x0012 #define CKSUMTYPE_HMAC_SHA256_128_AES128 0x0013 #define CKSUMTYPE_HMAC_SHA384_192_AES256 0x0014 #define CKSUMTYPE_HMAC_MD5_ARCFOUR -138 /* Microsoft md5 hmac cksumtype */ @@ -220,6 +222,8 @@ enum seal_alg { #define ENCTYPE_AES256_CTS_HMAC_SHA384_192 0x0014 #define ENCTYPE_ARCFOUR_HMAC 0x0017 #define ENCTYPE_ARCFOUR_HMAC_EXP 0x0018 +#define ENCTYPE_CAMELLIA128_CTS_CMAC 0x0019 +#define ENCTYPE_CAMELLIA256_CTS_CMAC 0x001A #define ENCTYPE_UNKNOWN 0x01ff /* -- cgit v1.2.3 From 6e460c230d2dfb0e5a02b6e0995546bb4b9d208e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:23:27 -0500 Subject: SUNRPC: Move remaining internal definitions to gss_krb5_internal.h The goal is to leave only protocol-defined items in gss_krb5.h so that it can be easily replaced by a generic header. Implementation specific items are moved to the new internal header. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 117 ---------------------------------------- 1 file changed, 117 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index cbb6c8192890..78a80bf3fdcb 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -42,12 +42,6 @@ #include #include -/* - * The RFCs often specify payload lengths in bits. This helper - * converts a specified bit-length to the number of octets/bytes. - */ -#define BITS2OCTETS(x) ((x) / 8) - /* Length of constant used in key derivation */ #define GSS_KRB5_K5CLENGTH (5) @@ -60,74 +54,6 @@ /* Maximum blocksize for the supported crypto algorithms */ #define GSS_KRB5_MAX_BLOCKSIZE (16) -struct krb5_ctx; - -struct gss_krb5_enctype { - const u32 etype; /* encryption (key) type */ - const u32 ctype; /* checksum type */ - const char *name; /* "friendly" name */ - const char *encrypt_name; /* crypto encrypt name */ - const char *aux_cipher; /* aux encrypt cipher name */ - const char *cksum_name; /* crypto checksum name */ - const u16 signalg; /* signing algorithm */ - const u16 sealalg; /* sealing algorithm */ - const u32 cksumlength; /* checksum length */ - const u32 keyed_cksum; /* is it a keyed cksum? */ - const u32 keybytes; /* raw key len, in bytes */ - const u32 keylength; /* protocol key length, in octets */ - const u32 Kc_length; /* checksum subkey length, in octets */ - const u32 Ke_length; /* encryption subkey length, in octets */ - const u32 Ki_length; /* integrity subkey length, in octets */ - - int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask); - int (*derive_key)(const struct gss_krb5_enctype *gk5e, - const struct xdr_netobj *in, - struct xdr_netobj *out, - const struct xdr_netobj *label, - gfp_t gfp_mask); - u32 (*encrypt)(struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, struct page **pages); - u32 (*decrypt)(struct krb5_ctx *kctx, u32 offset, u32 len, - struct xdr_buf *buf, u32 *headskip, u32 *tailskip); - u32 (*get_mic)(struct krb5_ctx *kctx, struct xdr_buf *text, - struct xdr_netobj *token); - u32 (*verify_mic)(struct krb5_ctx *kctx, struct xdr_buf *message_buffer, - struct xdr_netobj *read_token); - u32 (*wrap)(struct krb5_ctx *kctx, int offset, - struct xdr_buf *buf, struct page **pages); - u32 (*unwrap)(struct krb5_ctx *kctx, int offset, int len, - struct xdr_buf *buf, unsigned int *slack, - unsigned int *align); -}; - -/* krb5_ctx flags definitions */ -#define KRB5_CTX_FLAG_INITIATOR 0x00000001 -#define KRB5_CTX_FLAG_CFX 0x00000002 -#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004 - -struct krb5_ctx { - int initiate; /* 1 = initiating, 0 = accepting */ - u32 enctype; - u32 flags; - const struct gss_krb5_enctype *gk5e; /* enctype-specific info */ - struct crypto_sync_skcipher *enc; - struct crypto_sync_skcipher *seq; - struct crypto_sync_skcipher *acceptor_enc; - struct crypto_sync_skcipher *initiator_enc; - struct crypto_sync_skcipher *acceptor_enc_aux; - struct crypto_sync_skcipher *initiator_enc_aux; - struct crypto_ahash *acceptor_sign; - struct crypto_ahash *initiator_sign; - struct crypto_ahash *initiator_integ; - struct crypto_ahash *acceptor_integ; - u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ - u8 cksum[GSS_KRB5_MAX_KEYLEN]; - atomic_t seq_send; - atomic64_t seq_send64; - time64_t endtime; - struct xdr_netobj mech_used; -}; - /* The length of the Kerberos GSS token header */ #define GSS_KRB5_TOK_HDR_LEN (16) @@ -245,47 +171,4 @@ enum seal_alg { #define KG_USAGE_INITIATOR_SEAL (24) #define KG_USAGE_INITIATOR_SIGN (25) -/* - * This compile-time check verifies that we will not exceed the - * slack space allotted by the client and server auth_gss code - * before they call gss_wrap(). - */ -#define GSS_KRB5_MAX_SLACK_NEEDED \ - (GSS_KRB5_TOK_HDR_LEN /* gss token header */ \ - + GSS_KRB5_MAX_CKSUM_LEN /* gss token checksum */ \ - + GSS_KRB5_MAX_BLOCKSIZE /* confounder */ \ - + GSS_KRB5_MAX_BLOCKSIZE /* possible padding */ \ - + GSS_KRB5_TOK_HDR_LEN /* encrypted hdr in v2 token */\ - + GSS_KRB5_MAX_CKSUM_LEN /* encryption hmac */ \ - + 4 + 4 /* RPC verifier */ \ - + GSS_KRB5_TOK_HDR_LEN \ - + GSS_KRB5_MAX_CKSUM_LEN) - -u32 -make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, - struct xdr_buf *body, int body_offset, u8 *cksumkey, - unsigned int usage, struct xdr_netobj *cksumout); - -int -gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *outbuf, - int offset, struct page **pages); - -int -gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *inbuf, - int offset); - -s32 -krb5_make_seq_num(struct krb5_ctx *kctx, - struct crypto_sync_skcipher *key, - int direction, - u32 seqnum, unsigned char *cksum, unsigned char *buf); - -s32 -krb5_get_seq_num(struct krb5_ctx *kctx, - unsigned char *cksum, - unsigned char *buf, int *direction, u32 *seqnum); - -int -xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen); - #endif /* _LINUX_SUNRPC_GSS_KRB5_H */ -- cgit v1.2.3 From 319951eba0fc412a78a8fe3d2ee5e143cc318c14 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 24 Jan 2023 15:40:22 -0500 Subject: SUNRPC: Remove ->xpo_secure_port() There's no need for the cost of this extra virtual function call during every RPC transaction: the RQ_SECURE bit can be set properly in ->xpo_recvfrom() instead. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index d42a75b3be10..775368802762 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -26,7 +26,6 @@ struct svc_xprt_ops { void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); - void (*xpo_secure_port)(struct svc_rqst *rqstp); void (*xpo_kill_temp_xprt)(struct svc_xprt *); void (*xpo_start_tls)(struct svc_xprt *); }; -- cgit v1.2.3 From 225a05043cd87a37455bfbff8c35d4e276519387 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 8 Feb 2023 14:56:11 +0000 Subject: filemap: Remove lock_page_killable() There are no more callers; remove this function before any more appear. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells Signed-off-by: Steve French --- include/linux/pagemap.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 29e1f9e76eb6..2f5b36f446cc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -977,16 +977,6 @@ static inline int folio_lock_killable(struct folio *folio) return 0; } -/* - * lock_page_killable is like lock_page but can be interrupted by fatal - * signals. It returns 0 if it locked the page and -EINTR if it was - * killed while waiting. - */ -static inline int lock_page_killable(struct page *page) -{ - return folio_lock_killable(page_folio(page)); -} - /* * folio_lock_or_retry - Lock the folio, unless this would block and the * caller indicated that it can handle a retry. -- cgit v1.2.3 From 9747b9e92418b61c2281561e0651803f1fad0159 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 15 Feb 2023 18:39:36 +0800 Subject: mm: hugetlb: change to return bool for isolate_hugetlb() Now the isolate_hugetlb() only returns 0 or -EBUSY, and most users did not care about the negative value, thus we can convert the isolate_hugetlb() to return a boolean value to make code more clear when checking the hugetlb isolation state. Moreover converts 2 users which will consider the negative value returned by isolate_hugetlb(). No functional changes intended. [akpm@linux-foundation.org: shorten locked section, per SeongJae Park] Link: https://lkml.kernel.org/r/12a287c5bebc13df304387087bbecc6421510849.1676424378.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Mike Kravetz Acked-by: Linus Torvalds Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index df6dd624ccfe..5f5e4177b2e0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -171,7 +171,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -int isolate_hugetlb(struct folio *folio, struct list_head *list); +bool isolate_hugetlb(struct folio *folio, struct list_head *list); int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); @@ -413,9 +413,9 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return NULL; } -static inline int isolate_hugetlb(struct folio *folio, struct list_head *list) +static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list) { - return -EBUSY; + return false; } static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) -- cgit v1.2.3 From cd7755800eb54e8522f5e51f4e71e6494c1f1572 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 15 Feb 2023 18:39:37 +0800 Subject: mm: change to return bool for isolate_movable_page() Now the isolate_movable_page() can only return 0 or -EBUSY, and no users will care about the negative return value, thus we can convert the isolate_movable_page() to return a boolean value to make the code more clear when checking the movable page isolation state. No functional changes intended. [akpm@linux-foundation.org: remove unneeded comment, per Matthew] Link: https://lkml.kernel.org/r/cb877f73f4fff8d309611082ec740a7065b1ade0.1676424378.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Linus Torvalds Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/migrate.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index c88b96b48be7..6b252f519c86 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -71,7 +71,7 @@ extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded); extern struct page *alloc_migration_target(struct page *page, unsigned long private); -extern int isolate_movable_page(struct page *page, isolate_mode_t mode); +extern bool isolate_movable_page(struct page *page, isolate_mode_t mode); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); @@ -92,8 +92,8 @@ static inline int migrate_pages(struct list_head *l, new_page_t new, static inline struct page *alloc_migration_target(struct page *page, unsigned long private) { return NULL; } -static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) - { return -EBUSY; } +static inline bool isolate_movable_page(struct page *page, isolate_mode_t mode) + { return false; } static inline int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src) -- cgit v1.2.3 From f9366f4c2a29d14f5992b195e268240c2deb116e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 16 Feb 2023 14:44:24 -0800 Subject: include/linux/migrate.h: remove unneeded externs As suggested by Matthew. Suggested-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/migrate.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 6b252f519c86..6241a1596a75 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -62,16 +62,16 @@ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION -extern void putback_movable_pages(struct list_head *l); +void putback_movable_pages(struct list_head *l); int migrate_folio_extra(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode, int extra_count); int migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode); -extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, - unsigned long private, enum migrate_mode mode, int reason, - unsigned int *ret_succeeded); -extern struct page *alloc_migration_target(struct page *page, unsigned long private); -extern bool isolate_movable_page(struct page *page, isolate_mode_t mode); +int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, + unsigned long private, enum migrate_mode mode, int reason, + unsigned int *ret_succeeded); +struct page *alloc_migration_target(struct page *page, unsigned long private); +bool isolate_movable_page(struct page *page, isolate_mode_t mode); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); @@ -142,8 +142,8 @@ const struct movable_operations *page_movable_ops(struct page *page) } #ifdef CONFIG_NUMA_BALANCING -extern int migrate_misplaced_page(struct page *page, - struct vm_area_struct *vma, int node); +int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, + int node); #else static inline int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node) -- cgit v1.2.3 From d9194e009efef9613f48022f3c885191c48120f9 Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Mon, 13 Feb 2023 12:57:02 -0600 Subject: of: dynamic: add lifecycle docbook info to node creation functions The existing docbook comments for the functions related to creating a devicetree node do not explain the reference count of a newly created node, how decrementing the reference count to zero will free the associated memory, and the caller's responsibility to call of_node_put() on the node. Explain what happens when the reference count is decremented to zero. Signed-off-by: Frank Rowand Link: https://lore.kernel.org/r/20230213185702.395776-8-frowand.list@gmail.com Signed-off-by: Rob Herring --- include/linux/of.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 37afd04f36eb..1cb659e682f5 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -100,6 +100,17 @@ struct of_reconfig_data { struct property *old_prop; }; +/** + * of_node_init - initialize a devicetree node + * @node: Pointer to device node that has been created by kzalloc() + * @phandle_name: Name of property holding a phandle value + * + * On return the device_node refcount is set to one. Use of_node_put() + * on @node when done to free the memory allocated for it. If the node + * is NOT a dynamic node the memory will not be freed. The decision of + * whether to free the memory will be done by node->release(), which is + * of_node_release(). + */ /* initialize a node */ extern const struct kobj_type of_node_ktype; extern const struct fwnode_operations of_fwnode_ops; -- cgit v1.2.3 From 07073eb01c5f630344bc1c3e56b0e0d94aedf919 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Feb 2023 15:01:42 +0000 Subject: splice: Add a func to do a splice from a buffered file without ITER_PIPE Provide a function to do splice read from a buffered file, pulling the folios out of the pagecache directly by calling filemap_get_pages() to do any required reading and then pasting the returned folios into the pipe. A helper function is provided to do the actual folio pasting and will handle multipage folios by splicing as many of the relevant subpages as will fit into the pipe. The code is loosely based on filemap_read() and might belong in mm/filemap.c with that as it needs to use filemap_get_pages(). Signed-off-by: David Howells Reviewed-by: Jens Axboe cc: Christoph Hellwig cc: Al Viro cc: David Hildenbrand cc: John Hubbard cc: linux-mm@kvack.org cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- include/linux/fs.h | 3 +++ include/linux/pipe_fs_i.h | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c1769a2c5d70..28743e38df91 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3163,6 +3163,9 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, struct iov_iter *iter); /* fs/splice.c */ +ssize_t filemap_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags); extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 6cb65df3e3ba..d2c3f16cf6b1 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -156,6 +156,26 @@ static inline bool pipe_full(unsigned int head, unsigned int tail, return pipe_occupancy(head, tail) >= limit; } +/** + * pipe_buf - Return the pipe buffer for the specified slot in the pipe ring + * @pipe: The pipe to access + * @slot: The slot of interest + */ +static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, + unsigned int slot) +{ + return &pipe->bufs[slot & (pipe->ring_size - 1)]; +} + +/** + * pipe_head_buf - Return the pipe buffer at the head of the pipe ring + * @pipe: The pipe to access + */ +static inline struct pipe_buffer *pipe_head_buf(const struct pipe_inode_info *pipe) +{ + return pipe_buf(pipe, pipe->head); +} + /** * pipe_buf_get - get a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to -- cgit v1.2.3 From 33b3b041543e8b3abf9a692d0f8c2ab0e07c50cd Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Feb 2023 10:45:40 +0000 Subject: splice: Add a func to do a splice from an O_DIRECT file without ITER_PIPE Implement a function, direct_file_splice(), that deals with this by using an ITER_BVEC iterator instead of an ITER_PIPE iterator as the former won't free its buffers when reverted. The function bulk allocates all the buffers it thinks it is going to use in advance, does the read synchronously and only then trims the buffer down. The pages we did use get pushed into the pipe. This fixes a problem with the upcoming iov_iter_extract_pages() function, whereby pages extracted from a non-user-backed iterator such as ITER_PIPE aren't pinned. __iomap_dio_rw(), however, calls iov_iter_revert() to shorten the iterator to just the bufferage it is going to use - which has the side-effect of freeing the excess pipe buffers, even though they're attached to a bio and may get written to by DMA (thanks to Hillf Danton for spotting this[1]). This then causes memory corruption that is particularly noticeable when the syzbot test[2] is run. The test boils down to: out = creat(argv[1], 0666); ftruncate(out, 0x800); lseek(out, 0x200, SEEK_SET); in = open(argv[1], O_RDONLY | O_DIRECT | O_NOFOLLOW); sendfile(out, in, NULL, 0x1dd00); run repeatedly in parallel. What I think is happening is that ftruncate() occasionally shortens the DIO read that's about to be made by sendfile's splice core by reducing i_size. This should be more efficient for DIO read by virtue of doing a bulk page allocation, but slightly less efficient by ignoring any partial page in the pipe. Reported-by: syzbot+a440341a59e3b7142895@syzkaller.appspotmail.com Signed-off-by: David Howells Reviewed-by: Jens Axboe cc: Christoph Hellwig cc: Al Viro cc: David Hildenbrand cc: John Hubbard cc: linux-mm@kvack.org cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20230207094731.1390-1-hdanton@sina.com/ [1] Link: https://lore.kernel.org/r/000000000000b0b3c005f3a09383@google.com/ [2] Signed-off-by: Steve French --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 28743e38df91..551c9403f9b3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3166,6 +3166,9 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, ssize_t filemap_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +ssize_t direct_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags); extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, -- cgit v1.2.3 From f62e52d1276b6cd329fe72d36bdf912b2ce4caaf Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Jan 2023 12:47:23 +0000 Subject: iov_iter: Define flags to qualify page extraction. Define flags to qualify page extraction to pass into iov_iter_*_pages*() rather than passing in FOLL_* flags. For now only a flag to allow peer-to-peer DMA is supported. Signed-off-by: David Howells Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jens Axboe cc: Al Viro cc: Logan Gunthorpe cc: linux-fsdevel@vger.kernel.org cc: linux-block@vger.kernel.org Signed-off-by: Steve French --- include/linux/uio.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 9f158238edba..eec6ed8a627a 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -13,6 +13,8 @@ struct page; struct pipe_inode_info; +typedef unsigned int __bitwise iov_iter_extraction_t; + struct kvec { void *iov_base; /* and that should *never* hold a userland pointer */ size_t iov_len; @@ -252,12 +254,12 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray * loff_t start, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start, - unsigned gup_flags); + iov_iter_extraction_t extraction_flags); ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start, - unsigned gup_flags); + iov_iter_extraction_t extraction_flags); ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); @@ -360,4 +362,8 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, }; } +/* Flags for iov_iter_get/extract_pages*() */ +/* Allow P2PDMA on the extracted pages */ +#define ITER_ALLOW_P2PDMA ((__force iov_iter_extraction_t)0x01) + #endif -- cgit v1.2.3 From 7d58fe731028128f3a7e20b9c492be48aae133ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 28 Oct 2022 21:50:30 +0100 Subject: iov_iter: Add a function to extract a page list from an iterator Add a function, iov_iter_extract_pages(), to extract a list of pages from an iterator. The pages may be returned with a pin added or nothing, depending on the type of iterator. Add a second function, iov_iter_extract_will_pin(), to determine how the cleanup should be done. There are two cases: (1) ITER_IOVEC or ITER_UBUF iterator. Extracted pages will have pins (FOLL_PIN) obtained on them so that a concurrent fork() will forcibly copy the page so that DMA is done to/from the parent's buffer and is unavailable to/unaffected by the child process. iov_iter_extract_will_pin() will return true for this case. The caller should use something like unpin_user_page() to dispose of the page. (2) Any other sort of iterator. No refs or pins are obtained on the page, the assumption is made that the caller will manage page retention. iov_iter_extract_will_pin() will return false. The pages don't need additional disposal. Signed-off-by: David Howells Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe cc: Al Viro cc: John Hubbard cc: David Hildenbrand cc: Matthew Wilcox cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Steve French --- include/linux/uio.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index eec6ed8a627a..514e3b7b06b8 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -361,9 +361,34 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, .count = count }; } - /* Flags for iov_iter_get/extract_pages*() */ /* Allow P2PDMA on the extracted pages */ #define ITER_ALLOW_P2PDMA ((__force iov_iter_extraction_t)0x01) +ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages, + size_t maxsize, unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0); + +/** + * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained + * @iter: The iterator + * + * Examine the iterator and indicate by returning true or false as to how, if + * at all, pages extracted from the iterator will be retained by the extraction + * function. + * + * %true indicates that the pages will have a pin placed in them that the + * caller must unpin. This is must be done for DMA/async DIO to force fork() + * to forcibly copy a page for the child (the parent must retain the original + * page). + * + * %false indicates that no measures are taken and that it's up to the caller + * to retain the pages. + */ +static inline bool iov_iter_extract_will_pin(const struct iov_iter *iter) +{ + return user_backed_iter(iter); +} + #endif -- cgit v1.2.3 From 85dd2c8ff368b1446be9febde84afe1d7aec4261 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Oct 2022 16:16:32 +0100 Subject: netfs: Add a function to extract a UBUF or IOVEC into a BVEC iterator Add a function to extract the pages from a user-space supplied iterator (UBUF- or IOVEC-type) into a BVEC-type iterator, retaining the pages by getting a pin on them (as FOLL_PIN) as we go. This is useful in three situations: (1) A userspace thread may have a sibling that unmaps or remaps the process's VM during the operation, changing the assignment of the pages and potentially causing an error. Retaining the pages keeps some pages around, even if this occurs; futher, we find out at the point of extraction if EFAULT is going to be incurred. (2) Pages might get swapped out/discarded if not retained, so we want to retain them to avoid the reload causing a deadlock due to a DIO from/to an mmapped region on the same file. (3) The iterator may get passed to sendmsg() by the filesystem. If a fault occurs, we may get a short write to a TCP stream that's then tricky to recover from. We don't deal with other types of iterator here, leaving it to other mechanisms to retain the pages (eg. PG_locked, PG_writeback and the pipe lock). Signed-off-by: David Howells cc: Jeff Layton cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: linux-cachefs@redhat.com cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- include/linux/netfs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 4c76ddfb6a67..b11a84f6c32b 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -17,6 +17,7 @@ #include #include #include +#include enum netfs_sreq_ref_trace; @@ -296,6 +297,9 @@ void netfs_get_subrequest(struct netfs_io_subrequest *subreq, void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, enum netfs_sreq_ref_trace what); void netfs_stats_show(struct seq_file *); +ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, + struct iov_iter *new, + iov_iter_extraction_t extraction_flags); /** * netfs_inode - Get the netfs inode context from the inode -- cgit v1.2.3 From 0185846975339a5c348373aa450a977f5242366b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Oct 2022 16:19:44 +0100 Subject: netfs: Add a function to extract an iterator into a scatterlist Provide a function for filling in a scatterlist from the list of pages contained in an iterator. If the iterator is UBUF- or IOBUF-type, the pages have a pin taken on them (as FOLL_PIN). If the iterator is BVEC-, KVEC- or XARRAY-type, no pin is taken on the pages and it is left to the caller to manage their lifetime. It cannot be assumed that a ref can be validly taken, particularly in the case of a KVEC iterator. Signed-off-by: David Howells cc: Jeff Layton cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: linux-cachefs@redhat.com cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- include/linux/netfs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index b11a84f6c32b..a1f3522daa69 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -300,6 +300,10 @@ void netfs_stats_show(struct seq_file *); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); +struct sg_table; +ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t len, + struct sg_table *sgtable, unsigned int sg_max, + iov_iter_extraction_t extraction_flags); /** * netfs_inode - Get the netfs inode context from the inode -- cgit v1.2.3 From 868a6fc0ca2407622d2833adefe1c4d284766c4c Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 21 Feb 2023 08:49:16 +0900 Subject: x86/kprobes: Fix __recover_optprobed_insn check optimizing logic Since the following commit: commit f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") modified the update timing of the KPROBE_FLAG_OPTIMIZED, a optimized_kprobe may be in the optimizing or unoptimizing state when op.kp->flags has KPROBE_FLAG_OPTIMIZED and op->list is not empty. The __recover_optprobed_insn check logic is incorrect, a kprobe in the unoptimizing state may be incorrectly determined as unoptimizing. As a result, incorrect instructions are copied. The optprobe_queued_unopt function needs to be exported for invoking in arch directory. Link: https://lore.kernel.org/all/20230216034247.32348-2-yangjihong1@huawei.com/ Fixes: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") Cc: stable@vger.kernel.org Signed-off-by: Yang Jihong Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- include/linux/kprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index a0b92be98984..ab39285f71a6 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -378,6 +378,7 @@ extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs); DEFINE_INSN_CACHE_OPS(optinsn); extern void wait_for_kprobe_optimizer(void); +bool optprobe_queued_unopt(struct optimized_kprobe *op); #else /* !CONFIG_OPTPROBES */ static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ -- cgit v1.2.3 From f1c97a1b4ef709e3f066f82e3ba3108c3b133ae6 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 21 Feb 2023 08:49:16 +0900 Subject: x86/kprobes: Fix arch_check_optimized_kprobe check within optimized_kprobe range When arch_prepare_optimized_kprobe calculating jump destination address, it copies original instructions from jmp-optimized kprobe (see __recover_optprobed_insn), and calculated based on length of original instruction. arch_check_optimized_kprobe does not check KPROBE_FLAG_OPTIMATED when checking whether jmp-optimized kprobe exists. As a result, setup_detour_execution may jump to a range that has been overwritten by jump destination address, resulting in an inval opcode error. For example, assume that register two kprobes whose addresses are and in "func" function. The original code of "func" function is as follows: 0xffffffff816cb5e9 <+9>: push %r12 0xffffffff816cb5eb <+11>: xor %r12d,%r12d 0xffffffff816cb5ee <+14>: test %rdi,%rdi 0xffffffff816cb5f1 <+17>: setne %r12b 0xffffffff816cb5f5 <+21>: push %rbp 1.Register the kprobe for , assume that is kp1, corresponding optimized_kprobe is op1. After the optimization, "func" code changes to: 0xffffffff816cc079 <+9>: push %r12 0xffffffff816cc07b <+11>: jmp 0xffffffffa0210000 0xffffffff816cc080 <+16>: incl 0xf(%rcx) 0xffffffff816cc083 <+19>: xchg %eax,%ebp 0xffffffff816cc084 <+20>: (bad) 0xffffffff816cc085 <+21>: push %rbp Now op1->flags == KPROBE_FLAG_OPTIMATED; 2. Register the kprobe for , assume that is kp2, corresponding optimized_kprobe is op2. register_kprobe(kp2) register_aggr_kprobe alloc_aggr_kprobe __prepare_optimized_kprobe arch_prepare_optimized_kprobe __recover_optprobed_insn // copy original bytes from kp1->optinsn.copied_insn, // jump address = 3. disable kp1: disable_kprobe(kp1) __disable_kprobe ... if (p == orig_p || aggr_kprobe_disabled(orig_p)) { ret = disarm_kprobe(orig_p, true) // add op1 in unoptimizing_list, not unoptimized orig_p->flags |= KPROBE_FLAG_DISABLED; // op1->flags == KPROBE_FLAG_OPTIMATED | KPROBE_FLAG_DISABLED ... 4. unregister kp2 __unregister_kprobe_top ... if (!kprobe_disabled(ap) && !kprobes_all_disarmed) { optimize_kprobe(op) ... if (arch_check_optimized_kprobe(op) < 0) // because op1 has KPROBE_FLAG_DISABLED, here not return return; p->kp.flags |= KPROBE_FLAG_OPTIMIZED; // now op2 has KPROBE_FLAG_OPTIMIZED } "func" code now is: 0xffffffff816cc079 <+9>: int3 0xffffffff816cc07a <+10>: push %rsp 0xffffffff816cc07b <+11>: jmp 0xffffffffa0210000 0xffffffff816cc080 <+16>: incl 0xf(%rcx) 0xffffffff816cc083 <+19>: xchg %eax,%ebp 0xffffffff816cc084 <+20>: (bad) 0xffffffff816cc085 <+21>: push %rbp 5. if call "func", int3 handler call setup_detour_execution: if (p->flags & KPROBE_FLAG_OPTIMIZED) { ... regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; ... } The code for the destination address is 0xffffffffa021072c: push %r12 0xffffffffa021072e: xor %r12d,%r12d 0xffffffffa0210731: jmp 0xffffffff816cb5ee However, is not a valid start instruction address. As a result, an error occurs. Link: https://lore.kernel.org/all/20230216034247.32348-3-yangjihong1@huawei.com/ Fixes: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") Signed-off-by: Yang Jihong Cc: stable@vger.kernel.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- include/linux/kprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index ab39285f71a6..85a64cb95d75 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -379,6 +379,7 @@ DEFINE_INSN_CACHE_OPS(optinsn); extern void wait_for_kprobe_optimizer(void); bool optprobe_queued_unopt(struct optimized_kprobe *op); +bool kprobe_disarmed(struct kprobe *p); #else /* !CONFIG_OPTPROBES */ static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ -- cgit v1.2.3 From db6c4dee4c104f50ed163af71c53bfdb878a8318 Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Tue, 10 Jan 2023 18:56:36 +0200 Subject: PCI: Add SolidRun vendor ID Add SolidRun vendor ID to pci_ids.h The vendor ID is used in 2 different source files, the SNET vDPA driver and PCI quirks. Signed-off-by: Alvaro Karsz Acked-by: Bjorn Helgaas Message-Id: <20230110165638.123745-2-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b362d90eb9b0..6716e6371a18 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3092,6 +3092,8 @@ #define PCI_VENDOR_ID_3COM_2 0xa727 +#define PCI_VENDOR_ID_SOLIDRUN 0xd063 + #define PCI_VENDOR_ID_DIGIUM 0xd161 #define PCI_DEVICE_ID_DIGIUM_HFC4S 0xb410 -- cgit v1.2.3 From 1538a8a49ecbe6d3302cd7f347632338e56857f8 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Tue, 3 Jan 2023 11:51:05 +0100 Subject: vdpa: Add resume operation Add a new operation to allow a vDPA device to be resumed after it has been suspended. Trying to resume a device that wasn't suspended will result in a no-op. This operation is optional. If it's not implemented, the associated backend feature bit will not be exposed. And if the feature bit is not exposed, invoking this operation will return an error. Acked-by: Jason Wang Signed-off-by: Sebastien Boeuf Message-Id: <6e05c4b31b47f3e29cb2bd7ebd56c81f84b8f48a.1672742878.git.sebastien.boeuf@intel.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- include/linux/vdpa.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 6d0f5e4e82c2..96d308cbf97b 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -219,7 +219,10 @@ struct vdpa_map_file { * @reset: Reset device * @vdev: vdpa device * Returns integer: success (0) or error (< 0) - * @suspend: Suspend or resume the device (optional) + * @suspend: Suspend the device (optional) + * @vdev: vdpa device + * Returns integer: success (0) or error (< 0) + * @resume: Resume the device (optional) * @vdev: vdpa device * Returns integer: success (0) or error (< 0) * @get_config_size: Get the size of the configuration space includes @@ -324,6 +327,7 @@ struct vdpa_config_ops { void (*set_status)(struct vdpa_device *vdev, u8 status); int (*reset)(struct vdpa_device *vdev); int (*suspend)(struct vdpa_device *vdev); + int (*resume)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev); void (*get_config)(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); -- cgit v1.2.3 From 2713ea3c7d930aa1ff5ef7d4a57a1e4fcc3b9985 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 19 Jan 2023 14:15:21 +0800 Subject: virtio_ring: per virtqueue dma device This patch introduces a per virtqueue dma device. This will be used for virtio devices whose virtqueue are backed by different underlayer devices. One example is the vDPA that where the control virtqueue could be implemented through software mediation. Some of the work are actually done before since the helper like vring_dma_device(). This work left are: - Let vring_dma_device() return the per virtqueue dma device instead of the vdev's parent. - Allow passing a dma_device when creating the virtqueue through a new helper, old vring creation helper will keep using vdev's parent. Reviewed-by: Eli Cohen Tested-by: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20230119061525.75068-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_ring.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 8b8af1a38991..8b95b69ef694 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -76,6 +76,22 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, void (*callback)(struct virtqueue *vq), const char *name); +/* + * Creates a virtqueue and allocates the descriptor ring with per + * virtqueue DMA device. + */ +struct virtqueue *vring_create_virtqueue_dma(unsigned int index, + unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + bool weak_barriers, + bool may_reduce_num, + bool ctx, + bool (*notify)(struct virtqueue *vq), + void (*callback)(struct virtqueue *vq), + const char *name, + struct device *dma_dev); + /* * Creates a virtqueue with a standard layout but a caller-allocated * ring. -- cgit v1.2.3 From 25da258fa61b1a902c781406a4e24a8284fc3d8a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 19 Jan 2023 14:15:22 +0800 Subject: vdpa: introduce get_vq_dma_device() This patch introduces a new method to query the dma device that is use for a specific virtqueue. Reviewed-by: Eli Cohen Tested-by: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20230119061525.75068-3-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 96d308cbf97b..43f59ef10cc9 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -285,6 +285,11 @@ struct vdpa_map_file { * @iova: iova to be unmapped * @size: size of the area * Returns integer: success (0) or error (< 0) + * @get_vq_dma_dev: Get the dma device for a specific + * virtqueue (optional) + * @vdev: vdpa device + * @idx: virtqueue index + * Returns pointer to structure device or error (NULL) * @free: Free resources that belongs to vDPA (optional) * @vdev: vdpa device */ @@ -345,6 +350,7 @@ struct vdpa_config_ops { u64 iova, u64 size); int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, unsigned int asid); + struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); /* Free device resources */ void (*free)(struct vdpa_device *vdev); -- cgit v1.2.3 From 2e44ca3f1f0ba2022f949003f02cc091144e54a3 Mon Sep 17 00:00:00 2001 From: Shunsuke Mie Date: Thu, 2 Feb 2023 19:42:48 +0900 Subject: vringh: fix a typo in comments for vringh_kiov Probably it is a simple copy error from struct vring_iov. Signed-off-by: Shunsuke Mie Message-Id: <20230202104248.2040652-1-mie@igel.co.jp> Signed-off-by: Michael S. Tsirkin --- include/linux/vringh.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 212892cf9822..1991a02c6431 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -92,7 +92,7 @@ struct vringh_iov { }; /** - * struct vringh_iov - kvec mangler. + * struct vringh_kiov - kvec mangler. * * Mangles kvec in place, and restores it. * Remaining data is iov + i, of used - i elements. -- cgit v1.2.3 From f8f185e39b4de91bc5235e5be0d829bea69d9b06 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 18 Feb 2023 10:38:50 -0800 Subject: net/mlx4_en: Introduce flexible array to silence overflow warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call "skb_copy_from_linear_data(skb, inl + 1, spc)" triggers a FORTIFY memcpy() warning on ppc64 platform: In function ‘fortify_memcpy_chk’, inlined from ‘skb_copy_from_linear_data’ at ./include/linux/skbuff.h:4029:2, inlined from ‘build_inline_wqe’ at drivers/net/ethernet/mellanox/mlx4/en_tx.c:722:4, inlined from ‘mlx4_en_xmit’ at drivers/net/ethernet/mellanox/mlx4/en_tx.c:1066:3: ./include/linux/fortify-string.h:513:25: error: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror=attribute-warning] 513 | __write_overflow_field(p_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Same behaviour on x86 you can get if you use "__always_inline" instead of "inline" for skb_copy_from_linear_data() in skbuff.h The call here copies data into inlined tx destricptor, which has 104 bytes (MAX_INLINE) space for data payload. In this case "spc" is known in compile-time but the destination is used with hidden knowledge (real structure of destination is different from that the compiler can see). That cause the fortify warning because compiler can check bounds, but the real bounds are different. "spc" can't be bigger than 64 bytes (MLX4_INLINE_ALIGN), so the data can always fit into inlined tx descriptor. The fact that "inl" points into inlined tx descriptor is determined earlier in mlx4_en_xmit(). Avoid confusing the compiler with "inl + 1" constructions to get to past the inl header by introducing a flexible array "data" to the struct so that the compiler can see that we are not dealing with an array of inl structs, but rather, arbitrary data following the structure. There are no changes to the structure layout reported by pahole, and the resulting machine code is actually smaller. Reported-by: Josef Oskera Link: https://lore.kernel.org/lkml/20230217094541.2362873-1-joskera@redhat.com Fixes: f68f2ff91512 ("fortify: Detect struct member overflows in memcpy() at compile-time") Cc: Yishai Hadas Signed-off-by: Kees Cook Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20230218183842.never.954-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/mlx4/qp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 9db93e487496..b6b626157b03 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -446,6 +446,7 @@ enum { struct mlx4_wqe_inline_seg { __be32 byte_count; + __u8 data[]; }; enum mlx4_update_qp_attr { -- cgit v1.2.3 From 80cd22c35c9001fe72bf614d29439de41933deca Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sat, 18 Feb 2023 00:36:14 +0200 Subject: net/sched: cls_api: Support hardware miss to tc action For drivers to support partial offload of a filter's action list, add support for action miss to specify an action instance to continue from in sw. CT action in particular can't be fully offloaded, as new connections need to be handled in software. This imposes other limitations on the actions that can be offloaded together with the CT action, such as packet modifications. Assign each action on a filter's action list a unique miss_cookie which drivers can then use to fill action_miss part of the tc skb extension. On getting back this miss_cookie, find the action instance with relevant cookie and continue classifying from there. Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Reviewed-by: Simon Horman Reviewed-by: Marcelo Ricardo Leitner Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d5602b15c714..ff7ad331fb82 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -319,12 +319,16 @@ struct nf_bridge_info { * and read by ovs to recirc_id. */ struct tc_skb_ext { - __u32 chain; + union { + u64 act_miss_cookie; + __u32 chain; + }; __u16 mru; __u16 zone; u8 post_ct:1; u8 post_ct_snat:1; u8 post_ct_dnat:1; + u8 act_miss:1; /* Set if act_miss_cookie is used */ }; #endif -- cgit v1.2.3 From 9e0c7efa5ea231d85c0d41693a5115b3b971717c Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Fri, 3 Feb 2023 11:40:29 +0900 Subject: block: remove more NULL checks after bdev_get_queue() bdev_get_queue() never returns NULL. Several commits [1][2] have been made before to remove such superfluous checks, but some still remained. For places where bdev_get_queue() is called solely for NULL checks, it is removed entirely. [1] commit ec9fd2a13d74 ("blk-lib: don't check bdev_get_queue() NULL check") [2] commit fea127b36c93 ("block: remove superfluous check for request queue in bdev_is_zoned()") Signed-off-by: Juhyung Park Reviewed-by: Pankaj Raghav Link: https://lore.kernel.org/r/20230203024029.48260-1-qkrwngud825@gmail.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b9637d63e6f0..89dd9b02b45b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1276,12 +1276,7 @@ static inline bool bdev_nowait(struct block_device *bdev) static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return blk_queue_zoned_model(q); - - return BLK_ZONED_NONE; + return blk_queue_zoned_model(bdev_get_queue(bdev)); } static inline bool bdev_is_zoned(struct block_device *bdev) -- cgit v1.2.3 From 74e19ef0ff8061ef55957c3abd71614ef0f42f47 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 21 Feb 2023 12:30:15 -0800 Subject: uaccess: Add speculation barrier to copy_from_user() The results of "access_ok()" can be mis-speculated. The result is that you can end speculatively: if (access_ok(from, size)) // Right here even for bad from/size combinations. On first glance, it would be ideal to just add a speculation barrier to "access_ok()" so that its results can never be mis-speculated. But there are lots of system calls just doing access_ok() via "copy_to_user()" and friends (example: fstat() and friends). Those are generally not problematic because they do not _consume_ data from userspace other than the pointer. They are also very quick and common system calls that should not be needlessly slowed down. "copy_from_user()" on the other hand uses a user-controller pointer and is frequently followed up with code that might affect caches. Take something like this: if (!copy_from_user(&kernelvar, uptr, size)) do_something_with(kernelvar); If userspace passes in an evil 'uptr' that *actually* points to a kernel addresses, and then do_something_with() has cache (or other) side-effects, it could allow userspace to infer kernel data values. Add a barrier to the common copy_from_user() code to prevent mis-speculated values which happen after the copy. Also add a stub for architectures that do not define barrier_nospec(). This makes the macro usable in generic code. Since the barrier is now usable in generic code, the x86 #ifdef in the BPF code can also go away. Reported-by: Jordy Zomer Suggested-by: Linus Torvalds Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Acked-by: Daniel Borkmann # BPF bits Signed-off-by: Linus Torvalds --- include/linux/nospec.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nospec.h b/include/linux/nospec.h index c1e79f72cd89..9f0af4f116d9 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -11,6 +11,10 @@ struct task_struct; +#ifndef barrier_nospec +# define barrier_nospec() do { } while (0) +#endif + /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise * @index: array element index -- cgit v1.2.3 From 6c40624930c58529185a257380442547580ed837 Mon Sep 17 00:00:00 2001 From: Souradeep Chowdhury Date: Wed, 22 Feb 2023 08:27:49 +0900 Subject: bootconfig: Increase max nodes of bootconfig from 1024 to 8192 for DCC support The Data Capture and Compare(DCC) is a debugging tool that uses the bootconfig for configuring the register values during boot-time. Increase the max nodes supported by bootconfig to cater to the requirements of the Data Capture and Compare Driver. Link: https://lore.kernel.org/all/1674536682-18404-1-git-send-email-quic_schowdhu@quicinc.com/ Signed-off-by: Souradeep Chowdhury Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- include/linux/bootconfig.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 1611f9db878e..ca73940e26df 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -59,7 +59,7 @@ struct xbc_node { /* Maximum size of boot config is 32KB - 1 */ #define XBC_DATA_MAX (XBC_VALUE - 1) -#define XBC_NODE_MAX 1024 +#define XBC_NODE_MAX 8192 #define XBC_KEYLEN_MAX 256 #define XBC_DEPTH_MAX 16 -- cgit v1.2.3 From fdf6491193e411087ae77bcbc6468e3e1cff99ed Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Feb 2023 17:24:00 +0100 Subject: netfilter: ctnetlink: make event listener tracking global pernet tracking doesn't work correctly because other netns might have set NETLINK_LISTEN_ALL_NSID on its event socket. In this case its expected that events originating in other net namespaces are also received. Making pernet-tracking work while also honoring NETLINK_LISTEN_ALL_NSID requires much more intrusive changes both in netlink and nfnetlink, f.e. adding a 'setsockopt' callback that lets nfnetlink know that the event socket entered (or left) ALL_NSID mode. Move to global tracking instead: if there is an event socket anywhere on the system, all net namespaces which have conntrack enabled and use autobind mode will allocate the ecache extension. netlink_has_listeners() returns false only if the given group has no subscribers in any net namespace, the 'net' argument passed to nfnetlink_has_listeners is only used to derive the protocol (nfnetlink), it has no other effect. For proper NETLINK_LISTEN_ALL_NSID-aware pernet tracking of event listeners a new netlink_has_net_listeners() is also needed. Fixes: 90d1daa45849 ("netfilter: conntrack: add nf_conntrack_events autodetect mode") Reported-by: Bryce Kahle Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d8817d381c14..bef8db9d6c08 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -488,4 +488,9 @@ extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook; */ DECLARE_PER_CPU(bool, nf_skb_duplicated); +/** + * Contains bitmask of ctnetlink event subscribers, if any. + * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag. + */ +extern u8 nf_ctnetlink_has_listener; #endif /*__LINUX_NETFILTER_H*/ -- cgit v1.2.3 From 8781ba7f45695af3ab8e8d1b55a31f527c9201a3 Mon Sep 17 00:00:00 2001 From: Aren Moynihan Date: Thu, 8 Dec 2022 17:02:26 -0500 Subject: mfd: axp20x: Fix order of pek rise and fall events The power button can get "stuck" if the rising edge and falling edge irq are read in the same pass. This can often be triggered when resuming from suspend if the power button is released before the kernel handles the interrupt. Swapping the order of the rise and fall events makes sure that the press event is handled first, which prevents this situation. Signed-off-by: Aren Moynihan Reviewed-by: Samuel Holland Tested-by: Samuel Holland Acked-by: Chen-Yu Tsai Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20221208220225.635414-1-aren@peacevolution.org --- include/linux/mfd/axp20x.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 9ab0e2fca7ea..2058194807bd 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -432,8 +432,9 @@ enum { AXP152_IRQ_PEK_SHORT, AXP152_IRQ_PEK_LONG, AXP152_IRQ_TIMER, - AXP152_IRQ_PEK_RIS_EDGE, + /* out of bit order to make sure the press event is handled first */ AXP152_IRQ_PEK_FAL_EDGE, + AXP152_IRQ_PEK_RIS_EDGE, AXP152_IRQ_GPIO3_INPUT, AXP152_IRQ_GPIO2_INPUT, AXP152_IRQ_GPIO1_INPUT, @@ -472,8 +473,9 @@ enum { AXP20X_IRQ_LOW_PWR_LVL1, AXP20X_IRQ_LOW_PWR_LVL2, AXP20X_IRQ_TIMER, - AXP20X_IRQ_PEK_RIS_EDGE, + /* out of bit order to make sure the press event is handled first */ AXP20X_IRQ_PEK_FAL_EDGE, + AXP20X_IRQ_PEK_RIS_EDGE, AXP20X_IRQ_GPIO3_INPUT, AXP20X_IRQ_GPIO2_INPUT, AXP20X_IRQ_GPIO1_INPUT, @@ -502,8 +504,9 @@ enum axp22x_irqs { AXP22X_IRQ_LOW_PWR_LVL1, AXP22X_IRQ_LOW_PWR_LVL2, AXP22X_IRQ_TIMER, - AXP22X_IRQ_PEK_RIS_EDGE, + /* out of bit order to make sure the press event is handled first */ AXP22X_IRQ_PEK_FAL_EDGE, + AXP22X_IRQ_PEK_RIS_EDGE, AXP22X_IRQ_GPIO1_INPUT, AXP22X_IRQ_GPIO0_INPUT, }; @@ -571,8 +574,9 @@ enum axp803_irqs { AXP803_IRQ_LOW_PWR_LVL1, AXP803_IRQ_LOW_PWR_LVL2, AXP803_IRQ_TIMER, - AXP803_IRQ_PEK_RIS_EDGE, + /* out of bit order to make sure the press event is handled first */ AXP803_IRQ_PEK_FAL_EDGE, + AXP803_IRQ_PEK_RIS_EDGE, AXP803_IRQ_PEK_SHORT, AXP803_IRQ_PEK_LONG, AXP803_IRQ_PEK_OVER_OFF, @@ -623,8 +627,9 @@ enum axp809_irqs { AXP809_IRQ_LOW_PWR_LVL1, AXP809_IRQ_LOW_PWR_LVL2, AXP809_IRQ_TIMER, - AXP809_IRQ_PEK_RIS_EDGE, + /* out of bit order to make sure the press event is handled first */ AXP809_IRQ_PEK_FAL_EDGE, + AXP809_IRQ_PEK_RIS_EDGE, AXP809_IRQ_PEK_SHORT, AXP809_IRQ_PEK_LONG, AXP809_IRQ_PEK_OVER_OFF, -- cgit v1.2.3 From ccc91b3ed3f641efa8e9050c587ef509b0e2be3a Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Thu, 8 Dec 2022 22:57:23 +0100 Subject: mfd: twl: Fix TWL6032 phy vbus detection TWL6032 has a few charging registers prepended before the charging registers the TWL6030 has. To be able to use common register defines declare the additional registers as additional module. At the moment this affects the access to CHARGERUSB_CTRL1 in phy-twl6030-usb. Without this patch, it is accessing the wrong register on TWL6032. The consequence is that presence of Vbus is not reported. Cc: Bin Liu Cc: Tony Lindgren Signed-off-by: Andreas Kemnade Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20221208215723.217557-1-andreas@kemnade.info --- include/linux/mfd/twl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h index eaa233038254..6e3d99b7a0ee 100644 --- a/include/linux/mfd/twl.h +++ b/include/linux/mfd/twl.h @@ -69,6 +69,8 @@ enum twl6030_module_ids { TWL6030_MODULE_GPADC, TWL6030_MODULE_GASGAUGE, + /* A few extra registers before the registers shared with the 6030 */ + TWL6032_MODULE_CHARGE, TWL6030_MODULE_LAST, }; -- cgit v1.2.3 From 88a32c2c5e98d72765846db83c1739e7b036770a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 9 Jan 2023 14:33:22 +0100 Subject: mfd: core: Spelling s/compement/complement/ Fix a misspelling of "complement". Signed-off-by: Geert Uytterhoeven Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/aa7abd7103a0e4be954ea63de78f12e8251b2964.1673271092.git.geert+renesas@glider.be --- include/linux/mfd/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 0bc7cba798a3..14ca7b471576 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -88,7 +88,7 @@ struct mfd_cell { const char *of_compatible; /* - * Address as defined in Device Tree. Used to compement 'of_compatible' + * Address as defined in Device Tree. Used to complement 'of_compatible' * (above) when matching OF nodes with devices that have identical * compatible strings */ -- cgit v1.2.3 From 5a7fbe452ad9e4a8988d4c20d7acf148383f8106 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 17 Nov 2022 08:21:51 +0100 Subject: backlight: pwm_bl: Drop support for legacy PWM probing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no in-tree user left which relies on legacy probing. So drop support for it which removes another user of the deprecated pwm_request() function. Signed-off-by: Uwe Kleine-König Reviewed-by: Daniel Thompson Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20221117072151.3789691-1-u.kleine-koenig@pengutronix.de --- include/linux/pwm_backlight.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h index 06086cb93b6f..cdd2ac366bc7 100644 --- a/include/linux/pwm_backlight.h +++ b/include/linux/pwm_backlight.h @@ -8,7 +8,6 @@ #include struct platform_pwm_backlight_data { - int pwm_id; unsigned int max_brightness; unsigned int dft_brightness; unsigned int lth_brightness; -- cgit v1.2.3 From ca78476e4888f1f1caac26c48ec715e546baf432 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Jan 2023 14:46:13 +0100 Subject: mfd: Remove toshiba tmio drivers Four separate mfd drivers are in the "tmio" family, and all of them were used in now-removed PXA machines (eseries, tosa, and hx4700), so the mfd drivers and all its children can be removed as well. Signed-off-by: Arnd Bergmann Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230105134622.254560-19-arnd@kernel.org --- include/linux/mfd/asic3.h | 313 ------------------------------------------- include/linux/mfd/t7l66xb.h | 29 ---- include/linux/mfd/tc6387xb.h | 19 --- include/linux/mfd/tc6393xb.h | 53 -------- include/linux/mfd/tmio.h | 5 - 5 files changed, 419 deletions(-) delete mode 100644 include/linux/mfd/asic3.h delete mode 100644 include/linux/mfd/t7l66xb.h delete mode 100644 include/linux/mfd/tc6387xb.h delete mode 100644 include/linux/mfd/tc6393xb.h (limited to 'include/linux') diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h deleted file mode 100644 index 61e686dbaa74..000000000000 --- a/include/linux/mfd/asic3.h +++ /dev/null @@ -1,313 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * include/linux/mfd/asic3.h - * - * Compaq ASIC3 headers. - * - * Copyright 2001 Compaq Computer Corporation. - * Copyright 2007-2008 OpenedHand Ltd. - */ - -#ifndef __ASIC3_H__ -#define __ASIC3_H__ - -#include - -struct led_classdev; -struct asic3_led { - const char *name; - const char *default_trigger; - struct led_classdev *cdev; -}; - -struct asic3_platform_data { - u16 *gpio_config; - unsigned int gpio_config_num; - - unsigned int irq_base; - - unsigned int gpio_base; - - unsigned int clock_rate; - - struct asic3_led *leds; -}; - -#define ASIC3_NUM_GPIO_BANKS 4 -#define ASIC3_GPIOS_PER_BANK 16 -#define ASIC3_NUM_GPIOS 64 -#define ASIC3_NR_IRQS ASIC3_NUM_GPIOS + 6 - -#define ASIC3_IRQ_LED0 64 -#define ASIC3_IRQ_LED1 65 -#define ASIC3_IRQ_LED2 66 -#define ASIC3_IRQ_SPI 67 -#define ASIC3_IRQ_SMBUS 68 -#define ASIC3_IRQ_OWM 69 - -#define ASIC3_TO_GPIO(gpio) (NR_BUILTIN_GPIO + (gpio)) - -#define ASIC3_GPIO_BANK_A 0 -#define ASIC3_GPIO_BANK_B 1 -#define ASIC3_GPIO_BANK_C 2 -#define ASIC3_GPIO_BANK_D 3 - -#define ASIC3_GPIO(bank, gpio) \ - ((ASIC3_GPIOS_PER_BANK * ASIC3_GPIO_BANK_##bank) + (gpio)) -#define ASIC3_GPIO_bit(gpio) (1 << (gpio & 0xf)) -/* All offsets below are specified with this address bus shift */ -#define ASIC3_DEFAULT_ADDR_SHIFT 2 - -#define ASIC3_OFFSET(base, reg) (ASIC3_##base##_BASE + ASIC3_##base##_##reg) -#define ASIC3_GPIO_OFFSET(base, reg) \ - (ASIC3_GPIO_##base##_BASE + ASIC3_GPIO_##reg) - -#define ASIC3_GPIO_A_BASE 0x0000 -#define ASIC3_GPIO_B_BASE 0x0100 -#define ASIC3_GPIO_C_BASE 0x0200 -#define ASIC3_GPIO_D_BASE 0x0300 - -#define ASIC3_GPIO_TO_BANK(gpio) ((gpio) >> 4) -#define ASIC3_GPIO_TO_BIT(gpio) ((gpio) - \ - (ASIC3_GPIOS_PER_BANK * ((gpio) >> 4))) -#define ASIC3_GPIO_TO_MASK(gpio) (1 << ASIC3_GPIO_TO_BIT(gpio)) -#define ASIC3_GPIO_TO_BASE(gpio) (ASIC3_GPIO_A_BASE + (((gpio) >> 4) * 0x0100)) -#define ASIC3_BANK_TO_BASE(bank) (ASIC3_GPIO_A_BASE + ((bank) * 0x100)) - -#define ASIC3_GPIO_MASK 0x00 /* R/W 0:don't mask */ -#define ASIC3_GPIO_DIRECTION 0x04 /* R/W 0:input */ -#define ASIC3_GPIO_OUT 0x08 /* R/W 0:output low */ -#define ASIC3_GPIO_TRIGGER_TYPE 0x0c /* R/W 0:level */ -#define ASIC3_GPIO_EDGE_TRIGGER 0x10 /* R/W 0:falling */ -#define ASIC3_GPIO_LEVEL_TRIGGER 0x14 /* R/W 0:low level detect */ -#define ASIC3_GPIO_SLEEP_MASK 0x18 /* R/W 0:don't mask in sleep mode */ -#define ASIC3_GPIO_SLEEP_OUT 0x1c /* R/W level 0:low in sleep mode */ -#define ASIC3_GPIO_BAT_FAULT_OUT 0x20 /* R/W level 0:low in batt_fault */ -#define ASIC3_GPIO_INT_STATUS 0x24 /* R/W 0:none, 1:detect */ -#define ASIC3_GPIO_ALT_FUNCTION 0x28 /* R/W 1:LED register control */ -#define ASIC3_GPIO_SLEEP_CONF 0x2c /* - * R/W bit 1: autosleep - * 0: disable gposlpout in normal mode, - * enable gposlpout in sleep mode. - */ -#define ASIC3_GPIO_STATUS 0x30 /* R Pin status */ - -/* - * ASIC3 GPIO config - * - * Bits 0..6 gpio number - * Bits 7..13 Alternate function - * Bit 14 Direction - * Bit 15 Initial value - * - */ -#define ASIC3_CONFIG_GPIO_PIN(config) ((config) & 0x7f) -#define ASIC3_CONFIG_GPIO_ALT(config) (((config) & (0x7f << 7)) >> 7) -#define ASIC3_CONFIG_GPIO_DIR(config) ((config & (1 << 14)) >> 14) -#define ASIC3_CONFIG_GPIO_INIT(config) ((config & (1 << 15)) >> 15) -#define ASIC3_CONFIG_GPIO(gpio, alt, dir, init) (((gpio) & 0x7f) \ - | (((alt) & 0x7f) << 7) | (((dir) & 0x1) << 14) \ - | (((init) & 0x1) << 15)) -#define ASIC3_CONFIG_GPIO_DEFAULT(gpio, dir, init) \ - ASIC3_CONFIG_GPIO((gpio), 0, (dir), (init)) -#define ASIC3_CONFIG_GPIO_DEFAULT_OUT(gpio, init) \ - ASIC3_CONFIG_GPIO((gpio), 0, 1, (init)) - -/* - * Alternate functions - */ -#define ASIC3_GPIOA11_PWM0 ASIC3_CONFIG_GPIO(11, 1, 1, 0) -#define ASIC3_GPIOA12_PWM1 ASIC3_CONFIG_GPIO(12, 1, 1, 0) -#define ASIC3_GPIOA15_CONTROL_CX ASIC3_CONFIG_GPIO(15, 1, 1, 0) -#define ASIC3_GPIOC0_LED0 ASIC3_CONFIG_GPIO(32, 1, 0, 0) -#define ASIC3_GPIOC1_LED1 ASIC3_CONFIG_GPIO(33, 1, 0, 0) -#define ASIC3_GPIOC2_LED2 ASIC3_CONFIG_GPIO(34, 1, 0, 0) -#define ASIC3_GPIOC3_SPI_RXD ASIC3_CONFIG_GPIO(35, 1, 0, 0) -#define ASIC3_GPIOC4_CF_nCD ASIC3_CONFIG_GPIO(36, 1, 0, 0) -#define ASIC3_GPIOC4_SPI_TXD ASIC3_CONFIG_GPIO(36, 1, 1, 0) -#define ASIC3_GPIOC5_SPI_CLK ASIC3_CONFIG_GPIO(37, 1, 1, 0) -#define ASIC3_GPIOC5_nCIOW ASIC3_CONFIG_GPIO(37, 1, 1, 0) -#define ASIC3_GPIOC6_nCIOR ASIC3_CONFIG_GPIO(38, 1, 1, 0) -#define ASIC3_GPIOC7_nPCE_1 ASIC3_CONFIG_GPIO(39, 1, 0, 0) -#define ASIC3_GPIOC8_nPCE_2 ASIC3_CONFIG_GPIO(40, 1, 0, 0) -#define ASIC3_GPIOC9_nPOE ASIC3_CONFIG_GPIO(41, 1, 0, 0) -#define ASIC3_GPIOC10_nPWE ASIC3_CONFIG_GPIO(42, 1, 0, 0) -#define ASIC3_GPIOC11_PSKTSEL ASIC3_CONFIG_GPIO(43, 1, 0, 0) -#define ASIC3_GPIOC12_nPREG ASIC3_CONFIG_GPIO(44, 1, 0, 0) -#define ASIC3_GPIOC13_nPWAIT ASIC3_CONFIG_GPIO(45, 1, 1, 0) -#define ASIC3_GPIOC14_nPIOIS16 ASIC3_CONFIG_GPIO(46, 1, 1, 0) -#define ASIC3_GPIOC15_nPIOR ASIC3_CONFIG_GPIO(47, 1, 0, 0) -#define ASIC3_GPIOD4_CF_nCD ASIC3_CONFIG_GPIO(52, 1, 0, 0) -#define ASIC3_GPIOD11_nCIOIS16 ASIC3_CONFIG_GPIO(59, 1, 0, 0) -#define ASIC3_GPIOD12_nCWAIT ASIC3_CONFIG_GPIO(60, 1, 0, 0) -#define ASIC3_GPIOD15_nPIOW ASIC3_CONFIG_GPIO(63, 1, 0, 0) - - -#define ASIC3_SPI_Base 0x0400 -#define ASIC3_SPI_Control 0x0000 -#define ASIC3_SPI_TxData 0x0004 -#define ASIC3_SPI_RxData 0x0008 -#define ASIC3_SPI_Int 0x000c -#define ASIC3_SPI_Status 0x0010 - -#define SPI_CONTROL_SPR(clk) ((clk) & 0x0f) /* Clock rate */ - -#define ASIC3_PWM_0_Base 0x0500 -#define ASIC3_PWM_1_Base 0x0600 -#define ASIC3_PWM_TimeBase 0x0000 -#define ASIC3_PWM_PeriodTime 0x0004 -#define ASIC3_PWM_DutyTime 0x0008 - -#define PWM_TIMEBASE_VALUE(x) ((x)&0xf) /* Low 4 bits sets time base */ -#define PWM_TIMEBASE_ENABLE (1 << 4) /* Enable clock */ - -#define ASIC3_NUM_LEDS 3 -#define ASIC3_LED_0_Base 0x0700 -#define ASIC3_LED_1_Base 0x0800 -#define ASIC3_LED_2_Base 0x0900 -#define ASIC3_LED_TimeBase 0x0000 /* R/W 7 bits */ -#define ASIC3_LED_PeriodTime 0x0004 /* R/W 12 bits */ -#define ASIC3_LED_DutyTime 0x0008 /* R/W 12 bits */ -#define ASIC3_LED_AutoStopCount 0x000c /* R/W 16 bits */ - -/* LED TimeBase bits - match ASIC2 */ -#define LED_TBS 0x0f /* Low 4 bits sets time base, max = 13 */ - /* Note: max = 5 on hx4700 */ - /* 0: maximum time base */ - /* 1: maximum time base / 2 */ - /* n: maximum time base / 2^n */ - -#define LED_EN (1 << 4) /* LED ON/OFF 0:off, 1:on */ -#define LED_AUTOSTOP (1 << 5) /* LED ON/OFF auto stop 0:disable, 1:enable */ -#define LED_ALWAYS (1 << 6) /* LED Interrupt Mask 0:No mask, 1:mask */ - -#define ASIC3_CLOCK_BASE 0x0A00 -#define ASIC3_CLOCK_CDEX 0x00 -#define ASIC3_CLOCK_SEL 0x04 - -#define CLOCK_CDEX_SOURCE (1 << 0) /* 2 bits */ -#define CLOCK_CDEX_SOURCE0 (1 << 0) -#define CLOCK_CDEX_SOURCE1 (1 << 1) -#define CLOCK_CDEX_SPI (1 << 2) -#define CLOCK_CDEX_OWM (1 << 3) -#define CLOCK_CDEX_PWM0 (1 << 4) -#define CLOCK_CDEX_PWM1 (1 << 5) -#define CLOCK_CDEX_LED0 (1 << 6) -#define CLOCK_CDEX_LED1 (1 << 7) -#define CLOCK_CDEX_LED2 (1 << 8) - -/* Clocks settings: 1 for 24.576 MHz, 0 for 12.288Mhz */ -#define CLOCK_CDEX_SD_HOST (1 << 9) /* R/W: SD host clock source */ -#define CLOCK_CDEX_SD_BUS (1 << 10) /* R/W: SD bus clock source ctrl */ -#define CLOCK_CDEX_SMBUS (1 << 11) -#define CLOCK_CDEX_CONTROL_CX (1 << 12) - -#define CLOCK_CDEX_EX0 (1 << 13) /* R/W: 32.768 kHz crystal */ -#define CLOCK_CDEX_EX1 (1 << 14) /* R/W: 24.576 MHz crystal */ - -#define CLOCK_SEL_SD_HCLK_SEL (1 << 0) /* R/W: SDIO host clock select */ -#define CLOCK_SEL_SD_BCLK_SEL (1 << 1) /* R/W: SDIO bus clock select */ - -/* R/W: INT clock source control (32.768 kHz) */ -#define CLOCK_SEL_CX (1 << 2) - - -#define ASIC3_INTR_BASE 0x0B00 - -#define ASIC3_INTR_INT_MASK 0x00 /* Interrupt mask control */ -#define ASIC3_INTR_P_INT_STAT 0x04 /* Peripheral interrupt status */ -#define ASIC3_INTR_INT_CPS 0x08 /* Interrupt timer clock pre-scale */ -#define ASIC3_INTR_INT_TBS 0x0c /* Interrupt timer set */ - -#define ASIC3_INTMASK_GINTMASK (1 << 0) /* Global INTs mask 1:enable */ -#define ASIC3_INTMASK_GINTEL (1 << 1) /* 1: rising edge, 0: hi level */ -#define ASIC3_INTMASK_MASK0 (1 << 2) -#define ASIC3_INTMASK_MASK1 (1 << 3) -#define ASIC3_INTMASK_MASK2 (1 << 4) -#define ASIC3_INTMASK_MASK3 (1 << 5) -#define ASIC3_INTMASK_MASK4 (1 << 6) -#define ASIC3_INTMASK_MASK5 (1 << 7) - -#define ASIC3_INTR_PERIPHERAL_A (1 << 0) -#define ASIC3_INTR_PERIPHERAL_B (1 << 1) -#define ASIC3_INTR_PERIPHERAL_C (1 << 2) -#define ASIC3_INTR_PERIPHERAL_D (1 << 3) -#define ASIC3_INTR_LED0 (1 << 4) -#define ASIC3_INTR_LED1 (1 << 5) -#define ASIC3_INTR_LED2 (1 << 6) -#define ASIC3_INTR_SPI (1 << 7) -#define ASIC3_INTR_SMBUS (1 << 8) -#define ASIC3_INTR_OWM (1 << 9) - -#define ASIC3_INTR_CPS(x) ((x)&0x0f) /* 4 bits, max 14 */ -#define ASIC3_INTR_CPS_SET (1 << 4) /* Time base enable */ - - -/* Basic control of the SD ASIC */ -#define ASIC3_SDHWCTRL_BASE 0x0E00 -#define ASIC3_SDHWCTRL_SDCONF 0x00 - -#define ASIC3_SDHWCTRL_SUSPEND (1 << 0) /* 1=suspend all SD operations */ -#define ASIC3_SDHWCTRL_CLKSEL (1 << 1) /* 1=SDICK, 0=HCLK */ -#define ASIC3_SDHWCTRL_PCLR (1 << 2) /* All registers of SDIO cleared */ -#define ASIC3_SDHWCTRL_LEVCD (1 << 3) /* SD card detection: 0:low */ - -/* SD card write protection: 0=high */ -#define ASIC3_SDHWCTRL_LEVWP (1 << 4) -#define ASIC3_SDHWCTRL_SDLED (1 << 5) /* SD card LED signal 0=disable */ - -/* SD card power supply ctrl 1=enable */ -#define ASIC3_SDHWCTRL_SDPWR (1 << 6) - -#define ASIC3_EXTCF_BASE 0x1100 - -#define ASIC3_EXTCF_SELECT 0x00 -#define ASIC3_EXTCF_RESET 0x04 - -#define ASIC3_EXTCF_SMOD0 (1 << 0) /* slot number of mode 0 */ -#define ASIC3_EXTCF_SMOD1 (1 << 1) /* slot number of mode 1 */ -#define ASIC3_EXTCF_SMOD2 (1 << 2) /* slot number of mode 2 */ -#define ASIC3_EXTCF_OWM_EN (1 << 4) /* enable onewire module */ -#define ASIC3_EXTCF_OWM_SMB (1 << 5) /* OWM bus selection */ -#define ASIC3_EXTCF_OWM_RESET (1 << 6) /* ?? used by OWM and CF */ -#define ASIC3_EXTCF_CF0_SLEEP_MODE (1 << 7) /* CF0 sleep state */ -#define ASIC3_EXTCF_CF1_SLEEP_MODE (1 << 8) /* CF1 sleep state */ -#define ASIC3_EXTCF_CF0_PWAIT_EN (1 << 10) /* CF0 PWAIT_n control */ -#define ASIC3_EXTCF_CF1_PWAIT_EN (1 << 11) /* CF1 PWAIT_n control */ -#define ASIC3_EXTCF_CF0_BUF_EN (1 << 12) /* CF0 buffer control */ -#define ASIC3_EXTCF_CF1_BUF_EN (1 << 13) /* CF1 buffer control */ -#define ASIC3_EXTCF_SD_MEM_ENABLE (1 << 14) -#define ASIC3_EXTCF_CF_SLEEP (1 << 15) /* CF sleep mode control */ - -/********************************************* - * The Onewire interface (DS1WM) is handled - * by the ds1wm driver. - * - *********************************************/ - -#define ASIC3_OWM_BASE 0xC00 - -/***************************************************************************** - * The SD configuration registers are at a completely different location - * in memory. They are divided into three sets of registers: - * - * SD_CONFIG Core configuration register - * SD_CTRL Control registers for SD operations - * SDIO_CTRL Control registers for SDIO operations - * - *****************************************************************************/ -#define ASIC3_SD_CONFIG_BASE 0x0400 /* Assumes 32 bit addressing */ -#define ASIC3_SD_CONFIG_SIZE 0x0200 /* Assumes 32 bit addressing */ -#define ASIC3_SD_CTRL_BASE 0x1000 -#define ASIC3_SDIO_CTRL_BASE 0x1200 - -#define ASIC3_MAP_SIZE_32BIT 0x2000 -#define ASIC3_MAP_SIZE_16BIT 0x1000 - -/* Functions needed by leds-asic3 */ - -struct asic3; -extern void asic3_write_register(struct asic3 *asic, unsigned int reg, u32 val); -extern u32 asic3_read_register(struct asic3 *asic, unsigned int reg); - -#endif /* __ASIC3_H__ */ diff --git a/include/linux/mfd/t7l66xb.h b/include/linux/mfd/t7l66xb.h deleted file mode 100644 index ae3e7a5c5219..000000000000 --- a/include/linux/mfd/t7l66xb.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * This file contains the definitions for the T7L66XB - * - * (C) Copyright 2005 Ian Molton - */ -#ifndef MFD_T7L66XB_H -#define MFD_T7L66XB_H - -#include -#include - -struct t7l66xb_platform_data { - int (*enable)(struct platform_device *dev); - int (*suspend)(struct platform_device *dev); - int (*resume)(struct platform_device *dev); - - int irq_base; /* The base for subdevice irqs */ - - struct tmio_nand_data *nand_data; -}; - - -#define IRQ_T7L66XB_MMC (1) -#define IRQ_T7L66XB_NAND (3) - -#define T7L66XB_NR_IRQS 8 - -#endif diff --git a/include/linux/mfd/tc6387xb.h b/include/linux/mfd/tc6387xb.h deleted file mode 100644 index aacf1dcc86b9..000000000000 --- a/include/linux/mfd/tc6387xb.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * This file contains the definitions for the TC6387XB - * - * (C) Copyright 2005 Ian Molton - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - */ -#ifndef MFD_TC6387XB_H -#define MFD_TC6387XB_H - -struct tc6387xb_platform_data { - int (*enable)(struct platform_device *dev); - int (*suspend)(struct platform_device *dev); - int (*resume)(struct platform_device *dev); -}; - -#endif diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h deleted file mode 100644 index d17807f2d0c9..000000000000 --- a/include/linux/mfd/tc6393xb.h +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Toshiba TC6393XB SoC support - * - * Copyright(c) 2005-2006 Chris Humbert - * Copyright(c) 2005 Dirk Opfer - * Copyright(c) 2005 Ian Molton - * Copyright(c) 2007 Dmitry Baryshkov - * - * Based on code written by Sharp/Lineo for 2.4 kernels - * Based on locomo.c - */ - -#ifndef MFD_TC6393XB_H -#define MFD_TC6393XB_H - -#include - -/* Also one should provide the CK3P6MI clock */ -struct tc6393xb_platform_data { - u16 scr_pll2cr; /* PLL2 Control */ - u16 scr_gper; /* GP Enable */ - - int (*enable)(struct platform_device *dev); - void (*disable)(struct platform_device *dev); - int (*suspend)(struct platform_device *dev); - int (*resume)(struct platform_device *dev); - - int irq_base; /* base for subdevice irqs */ - - struct tmio_nand_data *nand_data; - struct tmio_fb_data *fb_data; - - unsigned resume_restore : 1; /* make special actions - to preserve the state - on suspend/resume */ -}; - -extern int tc6393xb_lcd_mode(struct platform_device *fb, - const struct fb_videomode *mode); -extern int tc6393xb_lcd_set_power(struct platform_device *fb, bool on); - -/* - * Relative to irq_base - */ -#define IRQ_TC6393_NAND 0 -#define IRQ_TC6393_MMC 1 -#define IRQ_TC6393_OHCI 2 -#define IRQ_TC6393_FB 4 - -#define TC6393XB_NR_IRQS 8 - -#endif diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index e8bf90281ba0..eace8ea6cda0 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -84,11 +84,6 @@ /* Some controllers have a CBSY bit */ #define TMIO_MMC_HAVE_CBSY BIT(11) -int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); -int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); -void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state); - struct dma_chan; /* -- cgit v1.2.3 From 8a15b4daed3d45c9162f23d393a06df2a7be4778 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 27 Jan 2023 17:58:28 +0100 Subject: mfd: ntxec: Add version number for EC in Tolino Vision The EC firmware has a different version number than anything defined until now. Signed-off-by: Andreas Kemnade Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230127165828.3256170-1-andreas@kemnade.info --- include/linux/mfd/ntxec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/ntxec.h b/include/linux/mfd/ntxec.h index cc6f07bfa2b3..e5880c346da9 100644 --- a/include/linux/mfd/ntxec.h +++ b/include/linux/mfd/ntxec.h @@ -34,5 +34,5 @@ static inline u16 ntxec_reg8(u8 value) /* Known firmware versions */ #define NTXEC_VERSION_KOBO_AURA 0xd726 /* found in Kobo Aura */ #define NTXEC_VERSION_TOLINO_SHINE2 0xf110 /* found in Tolino Shine 2 HD */ - +#define NTXEC_VERSION_TOLINO_VISION 0xe135 /* found in Tolino Vision, contains RTC, ADC, PWM, home pad */ #endif -- cgit v1.2.3 From c1855dd0a62b7ead360eb9231fb65c2108efaf47 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 22 Feb 2023 06:31:10 -0800 Subject: clk: qcom: Revert sync_state based clk_disable_unused Revert the postponement of clk_disable_unused() for clock providers that implement sync_state, and the change to drivers implementing this, until agreement on the implementation has been reached. This reverts: 29e31415e14e ("clk: qcom: Remove need for clk_ignore_unused on sc8280xp") 99c0f7d35c4b ("clk: qcom: sdm845: Use generic clk_sync_state_disable_unused callback") 26b36df75166 ("clk: Add generic sync_state callback for disabling unused clocks") Requested-by: Stephen Boyd Signed-off-by: Bjorn Andersson --- include/linux/clk-provider.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index cf1adfeaf257..842e72a5348f 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -720,7 +720,6 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); -void clk_sync_state_disable_unused(struct device *dev); /** * clk_register_divider - register a divider clock with the clock framework * @dev: device registering this clock -- cgit v1.2.3 From 8d664282a03fec09682f10252d3c785c2513691d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Feb 2023 08:27:23 -0700 Subject: io_uring: rename 'in_idle' to 'in_cancel' This better describes what it does - it's incremented when the task is currently undergoing a cancelation operation, due to exiting or exec'ing. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 0efe4d784358..00689c12f6ab 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -58,7 +58,7 @@ struct io_uring_task { struct xarray xa; struct wait_queue_head wait; - atomic_t in_idle; + atomic_t in_cancel; atomic_t inflight_tracked; struct percpu_counter inflight; -- cgit v1.2.3 From 16f8a08643b6d63d2e8252ddaa9e17e2408a2531 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:14:05 +0300 Subject: dmaengine: dw-edma: Add mem-mapped LL-entries support Currently the DW eDMA driver only supports the linked lists memory allocated locally with respect to the remote eDMA engine setup. It means the linked lists will be accessible by the CPU via the MMIO space only. If eDMA is embedded into the DW PCIe Root Ports or local Endpoints (which support will be added in subsequent commits) the linked lists are supposed to be allocated in the CPU memory. In that case the LL-entries can be directly accessed, while the former case implies using the MMIO accessors for that. In order to have both cases supported by the driver, the dw_edma_region descriptor should be fixed to contain the MMIO-backed and just memory-based virtual addresses. The linked lists initialization procedure will use one of them depending on the eDMA device nature. If the eDMA engine is embedded into the local DW PCIe Root Port/Endpoint controllers, the list entries will be directly accessed by referencing the corresponding structure fields. Otherwise the MMIO accessors usage will be preserved. Link: https://lore.kernel.org/r/20230113171409.30470-24-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Vinod Koul --- include/linux/dma/edma.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index 96bfd0173f3a..3b80040b95cc 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -19,7 +19,10 @@ struct dw_edma; struct dw_edma_region { u64 paddr; - void __iomem *vaddr; + union { + void *mem; + void __iomem *io; + } vaddr; size_t sz; }; -- cgit v1.2.3 From 3bc0f149405e07c7e59985a24ce96db83973f8d7 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:14:06 +0300 Subject: dmaengine: dw-edma: Prepare dw_edma_probe() for builtin callers When CONFIG_DW_EDMA=m, dw_edma_probe() is built as a module. Previously edma.h declared it as extern, but the implementation isn't available for builtin callers. A subsequent commit will add calls from dw_pcie_host_init() and dw_pcie_ep_init(), which can only be built-in. Make it safe for such builtin callers to call dw_edma_probe() by using IS_REACHABLE() to define a stub when CONFIG_DW_EDMA=m. When CONFIG_DW_EDMA=m, these builtin callers will fail to detect and register eDMA devices, so eDMA won't be usable even if the dw-edma module is loaded. [bhelgaas: split to separate patch, commit log] Link: https://lore.kernel.org/r/20230113171409.30470-25-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Vinod Koul --- include/linux/dma/edma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index 3b80040b95cc..d2638d9259dc 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -101,7 +101,7 @@ struct dw_edma_chip { }; /* Export to the platform drivers */ -#if IS_ENABLED(CONFIG_DW_EDMA) +#if IS_REACHABLE(CONFIG_DW_EDMA) int dw_edma_probe(struct dw_edma_chip *chip); int dw_edma_remove(struct dw_edma_chip *chip); #else -- cgit v1.2.3 From b1a37ed00d7908a991c1d0f18a8cba3c2aa99bdc Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 23 Jan 2023 12:39:11 +0000 Subject: HID: core: Provide new max_buffer_size attribute to over-ride the default Presently, when a report is processed, its proposed size, provided by the user of the API (as Report Size * Report Count) is compared against the subsystem default HID_MAX_BUFFER_SIZE (16k). However, some low-level HID drivers allocate a reduced amount of memory to their buffers (e.g. UHID only allocates UHID_DATA_MAX (4k) buffers), rending this check inadequate in some cases. In these circumstances, if the received report ends up being smaller than the proposed report size, the remainder of the buffer is zeroed. That is, the space between sizeof(csize) (size of the current report) and the rsize (size proposed i.e. Report Size * Report Count), which can be handled up to HID_MAX_BUFFER_SIZE (16k). Meaning that memset() shoots straight past the end of the buffer boundary and starts zeroing out in-use values, often resulting in calamity. This patch introduces a new variable into 'struct hid_ll_driver' where individual low-level drivers can over-ride the default maximum value of HID_MAX_BUFFER_SIZE (16k) with something more sympathetic to the interface. Signed-off-by: Lee Jones Signed-off-by: Jiri Kosina --- include/linux/hid.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index eaf8ab177303..1ea8c7a3570b 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -834,6 +834,7 @@ struct hid_driver { * @output_report: send output report to device * @idle: send idle request to device * @may_wakeup: return if device may act as a wakeup source during system-suspend + * @max_buffer_size: over-ride maximum data buffer size (default: HID_MAX_BUFFER_SIZE) */ struct hid_ll_driver { int (*start)(struct hid_device *hdev); @@ -859,6 +860,8 @@ struct hid_ll_driver { int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype); bool (*may_wakeup)(struct hid_device *hdev); + + unsigned int max_buffer_size; }; extern bool hid_is_usb(const struct hid_device *hdev); -- cgit v1.2.3 From b6478b8c93304fa0483e4657779b44634a1711c7 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 22 Feb 2023 06:50:41 +0100 Subject: net: phy: c45: add genphy_c45_an_config_eee_aneg() function Add new genphy_c45_an_config_eee_aneg() function and replace some of genphy_c45_write_eee_adv() calls. This will be needed by the next patch. Signed-off-by: Oleksij Rempel Reviewed-by: Russell King (Oracle) Signed-off-by: Paolo Abeni --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 727bff531a14..19d83e112beb 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1765,6 +1765,7 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); +int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; -- cgit v1.2.3 From 3eeca4e199cee2066c65b872391cecee5cbbbb81 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 22 Feb 2023 06:50:42 +0100 Subject: net: phy: do not force EEE support With following patches: commit 9b01c885be36 ("net: phy: c22: migrate to genphy_c45_write_eee_adv()") commit 5827b168125d ("net: phy: c45: migrate to genphy_c45_write_eee_adv()") we set the advertisement to potentially supported values. This behavior may introduce new regressions on systems where EEE was disabled by default (BIOS or boot loader configuration or by other ways.) At same time, with this patches, we would overwrite EEE advertisement configuration made over ethtool. To avoid this issues, we need to cache initial and ethtool advertisement configuration and store it for later use. Fixes: 9b01c885be36 ("net: phy: c22: migrate to genphy_c45_write_eee_adv()") Fixes: 5827b168125d ("net: phy: c45: migrate to genphy_c45_write_eee_adv()") Fixes: 022c3f87f88e ("net: phy: add genphy_c45_ethtool_get/set_eee() support") Signed-off-by: Oleksij Rempel Reviewed-by: Russell King (Oracle) Signed-off-by: Paolo Abeni --- include/linux/phy.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 19d83e112beb..36bf0bbc8efa 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -575,6 +575,8 @@ struct macsec_ops; * @advertising: Currently advertised linkmodes * @adv_old: Saved advertised while power saving for WoL * @supported_eee: supported PHY EEE linkmodes + * @advertising_eee: Currently advertised EEE linkmodes + * @eee_enabled: Flag indicating whether the EEE feature is enabled * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited @@ -681,6 +683,8 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); /* used for eee validation */ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); + bool eee_enabled; /* Host supported PHY interface types. Should be ignored if empty. */ DECLARE_PHY_INTERFACE_MASK(host_interfaces); @@ -1766,6 +1770,7 @@ int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); +int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; -- cgit v1.2.3 From d5f7638eb5fed0eb12e45a127764c4111b11c50e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Oct 2022 05:34:14 -0700 Subject: Input: matrix_keypad - replace header inclusions by forward declarations When the data structure is only referred by pointer, compiler may not need to see the contents of the data type. Thus, we may replace header inclusions by respective forward declarations. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220923184632.2157-2-andriy.shevchenko@linux.intel.com Signed-off-by: Dmitry Torokhov --- include/linux/input/matrix_keypad.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h index 9476768c3b90..b8d8d69eba29 100644 --- a/include/linux/input/matrix_keypad.h +++ b/include/linux/input/matrix_keypad.h @@ -3,8 +3,9 @@ #define _MATRIX_KEYPAD_H #include -#include -#include + +struct device; +struct input_dev; #define MATRIX_MAX_ROWS 32 #define MATRIX_MAX_COLS 32 -- cgit v1.2.3 From 0322ef49c1ac6f0e2ef37b146c0bf8440873072c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 24 Feb 2023 17:52:33 +0200 Subject: net: dsa: seville: ignore mscc-miim read errors from Lynx PCS During the refactoring in the commit below, vsc9953_mdio_read() was replaced with mscc_miim_read(), which has one extra step: it checks for the MSCC_MIIM_DATA_ERROR bits before returning the result. On T1040RDB, there are 8 QSGMII PCSes belonging to the switch, and they are organized in 2 groups. First group responds to MDIO addresses 4-7 because QSGMIIACR1[MDEV_PORT] is 1, and the second group responds to MDIO addresses 8-11 because QSGMIIBCR1[MDEV_PORT] is 2. I have double checked that these values are correctly set in the SERDES, as well as PCCR1[QSGMA_CFG] and PCCR1[QSGMB_CFG] are both 0b01. mscc_miim_read: phyad 8 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 8 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 8 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 8 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 9 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 9 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 9 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 9 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 10 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 10 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 10 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 10 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 11 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 11 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 11 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 11 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 4 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 4 reg 0x5 MIIM_DATA 0x3da01, ERROR mscc_miim_read: phyad 5 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 5 reg 0x5 MIIM_DATA 0x35801, ERROR mscc_miim_read: phyad 5 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 5 reg 0x5 MIIM_DATA 0x35801, ERROR mscc_miim_read: phyad 6 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 6 reg 0x5 MIIM_DATA 0x35801, ERROR mscc_miim_read: phyad 6 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 6 reg 0x5 MIIM_DATA 0x35801, ERROR mscc_miim_read: phyad 7 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 7 reg 0x5 MIIM_DATA 0x35801, ERROR mscc_miim_read: phyad 7 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 7 reg 0x5 MIIM_DATA 0x35801, ERROR As can be seen, the data in MIIM_DATA is still valid despite having the MSCC_MIIM_DATA_ERROR bits set. The driver as introduced in commit 84705fc16552 ("net: dsa: felix: introduce support for Seville VSC9953 switch") was ignoring these bits, perhaps deliberately (although unbeknownst to me). This is an old IP and the hardware team cannot seem to be able to help me track down a plausible reason for these failures. I'll keep investigating, but in the meantime, this is a direct regression which must be restored to a working state. The only thing I can do is keep ignoring the errors as before. Fixes: b99658452355 ("net: dsa: ocelot: felix: utilize shared mscc-miim driver for indirect MDIO access") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio/mdio-mscc-miim.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mdio/mdio-mscc-miim.h b/include/linux/mdio/mdio-mscc-miim.h index 5b4ed2c3cbb9..1ce699740af6 100644 --- a/include/linux/mdio/mdio-mscc-miim.h +++ b/include/linux/mdio/mdio-mscc-miim.h @@ -14,6 +14,6 @@ int mscc_miim_setup(struct device *device, struct mii_bus **bus, const char *name, struct regmap *mii_regmap, - int status_offset); + int status_offset, bool ignore_read_errors); #endif -- cgit v1.2.3 From 250870824c1cf199b032b1ef889c8e8d69d9123a Mon Sep 17 00:00:00 2001 From: Michael Karcher Date: Tue, 24 Jan 2023 22:48:16 +0100 Subject: sh: intc: Avoid spurious sizeof-pointer-div warning GCC warns about the pattern sizeof(void*)/sizeof(void), as it looks like the abuse of a pattern to calculate the array size. This pattern appears in the unevaluated part of the ternary operator in _INTC_ARRAY if the parameter is NULL. The replacement uses an alternate approach to return 0 in case of NULL which does not generate the pattern sizeof(void*)/sizeof(void), but still emits the warning if _INTC_ARRAY is called with a nonarray parameter. This patch is required for successful compilation with -Werror enabled. The idea to use _Generic for type distinction is taken from Comment #7 in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108483 by Jakub Jelinek Signed-off-by: Michael Karcher Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/619fa552-c988-35e5-b1d7-fe256c46a272@mkarcher.dialup.fu-berlin.de Signed-off-by: John Paul Adrian Glaubitz --- include/linux/sh_intc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index c255273b0281..37ad81058d6a 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -97,7 +97,10 @@ struct intc_hw_desc { unsigned int nr_subgroups; }; -#define _INTC_ARRAY(a) a, __same_type(a, NULL) ? 0 : sizeof(a)/sizeof(*a) +#define _INTC_SIZEOF_OR_ZERO(a) (_Generic(a, \ + typeof(NULL): 0, \ + default: sizeof(a))) +#define _INTC_ARRAY(a) a, _INTC_SIZEOF_OR_ZERO(a)/sizeof(*a) #define INTC_HW_DESC(vectors, groups, mask_regs, \ prio_regs, sense_regs, ack_regs) \ -- cgit v1.2.3 From a4eecbae092759537748360299de03e434c9a956 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 25 Jan 2023 16:55:56 +0100 Subject: capability: add cap_isidentical Signed-off-by: Mateusz Guzik Reviewed-by: Serge Hallyn Cc: Al Viro Signed-off-by: Linus Torvalds --- include/linux/capability.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 03c2a613ad40..d3c6c2d1ff45 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -157,6 +157,16 @@ static inline bool cap_isclear(const kernel_cap_t a) return true; } +static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) +{ + unsigned __capi; + CAP_FOR_EACH_U32(__capi) { + if (a.cap[__capi] != b.cap[__capi]) + return false; + } + return true; +} + /* * Check if "a" is a subset of "set". * return true if ALL of the capabilities in "a" are also in "set" -- cgit v1.2.3 From 6da6b1d4a7df8c35770186b53ef65d388398e139 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 21 Feb 2023 17:59:05 +0900 Subject: mm/hwpoison: convert TTU_IGNORE_HWPOISON to TTU_HWPOISON After a memory error happens on a clean folio, a process unexpectedly receives SIGBUS when it accesses the error page. This SIGBUS killing is pointless and simply degrades the level of RAS of the system, because the clean folio can be dropped without any data lost on memory error handling as we do for a clean pagecache. When memory_failure() is called on a clean folio, try_to_unmap() is called twice (one from split_huge_page() and one from hwpoison_user_mappings()). The root cause of the issue is that pte conversion to hwpoisoned entry is now done in the first call of try_to_unmap() because PageHWPoison is already set at this point, while it's actually expected to be done in the second call. This behavior disturbs the error handling operation like removing pagecache, which results in the malfunction described above. So convert TTU_IGNORE_HWPOISON into TTU_HWPOISON and set TTU_HWPOISON only when we really intend to convert pte to hwpoison entry. This can prevent other callers of try_to_unmap() from accidentally converting to hwpoison entries. Link: https://lkml.kernel.org/r/20230221085905.1465385-1-naoya.horiguchi@linux.dev Fixes: a42634a6c07d ("readahead: Use a folio in read_pages()") Signed-off-by: Naoya Horiguchi Cc: David Hildenbrand Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Minchan Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index a4570da03e58..b87d01660412 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -94,7 +94,7 @@ enum ttu_flags { TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ - TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ + TTU_HWPOISON = 0x20, /* do convert pte to hwpoison entry */ TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will * do a final flush if necessary */ -- cgit v1.2.3 From f122a08b197d076ccf136c73fae0146875812a88 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 28 Feb 2023 11:39:09 -0800 Subject: capability: just use a 'u64' instead of a 'u32[2]' array Back in 2008 we extended the capability bits from 32 to 64, and we did it by extending the single 32-bit capability word from one word to an array of two words. It was then obfuscated by hiding the "2" behind two macro expansions, with the reasoning being that maybe it gets extended further some day. That reasoning may have been valid at the time, but the last thing we want to do is to extend the capability set any more. And the array of values not only causes source code oddities (with loops to deal with it), but also results in worse code generation. It's a lose-lose situation. So just change the 'u32[2]' into a 'u64' and be done with it. We still have to deal with the fact that the user space interface is designed around an array of these 32-bit values, but that was the case before too, since the array layouts were different (ie user space doesn't use an array of 32-bit values for individual capability masks, but an array of 32-bit slices of multiple masks). So that marshalling of data is actually simplified too, even if it does remain somewhat obscure and odd. This was all triggered by my reaction to the new "cap_isidentical()" introduced recently. By just using a saner data structure, it went from unsigned __capi; CAP_FOR_EACH_U32(__capi) { if (a.cap[__capi] != b.cap[__capi]) return false; } return true; to just being return a.val == b.val; instead. Which is rather more obvious both to humans and to compilers. Cc: Mateusz Guzik Cc: Casey Schaufler Cc: Serge Hallyn Cc: Al Viro Cc: Paul Moore Signed-off-by: Linus Torvalds --- include/linux/capability.h | 131 ++++++++++----------------------------------- 1 file changed, 29 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index d3c6c2d1ff45..0c356a517991 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -15,28 +15,25 @@ #include #include +#include #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 -#define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 extern int file_caps_enabled; -typedef struct kernel_cap_struct { - __u32 cap[_KERNEL_CAPABILITY_U32S]; -} kernel_cap_t; +typedef struct { u64 val; } kernel_cap_t; /* same as vfs_ns_cap_data but in cpu endian and always filled completely */ struct cpu_vfs_cap_data { __u32 magic_etc; + kuid_t rootid; kernel_cap_t permitted; kernel_cap_t inheritable; - kuid_t rootid; }; #define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) #define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) - struct file; struct inode; struct dentry; @@ -44,16 +41,6 @@ struct task_struct; struct user_namespace; struct mnt_idmap; -extern const kernel_cap_t __cap_empty_set; -extern const kernel_cap_t __cap_init_eff_set; - -/* - * Internal kernel functions only - */ - -#define CAP_FOR_EACH_U32(__capi) \ - for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi) - /* * CAP_FS_MASK and CAP_NFSD_MASKS: * @@ -67,104 +54,52 @@ extern const kernel_cap_t __cap_init_eff_set; * 2. The security.* and trusted.* xattrs are fs-related MAC permissions */ -# define CAP_FS_MASK_B0 (CAP_TO_MASK(CAP_CHOWN) \ - | CAP_TO_MASK(CAP_MKNOD) \ - | CAP_TO_MASK(CAP_DAC_OVERRIDE) \ - | CAP_TO_MASK(CAP_DAC_READ_SEARCH) \ - | CAP_TO_MASK(CAP_FOWNER) \ - | CAP_TO_MASK(CAP_FSETID)) - -# define CAP_FS_MASK_B1 (CAP_TO_MASK(CAP_MAC_OVERRIDE)) - -#if _KERNEL_CAPABILITY_U32S != 2 -# error Fix up hand-coded capability macro initializers -#else /* HAND-CODED capability initializers */ +# define CAP_FS_MASK (BIT_ULL(CAP_CHOWN) \ + | BIT_ULL(CAP_MKNOD) \ + | BIT_ULL(CAP_DAC_OVERRIDE) \ + | BIT_ULL(CAP_DAC_READ_SEARCH) \ + | BIT_ULL(CAP_FOWNER) \ + | BIT_ULL(CAP_FSETID) \ + | BIT_ULL(CAP_MAC_OVERRIDE)) +#define CAP_VALID_MASK (BIT_ULL(CAP_LAST_CAP+1)-1) -#define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1) -#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1) +# define CAP_EMPTY_SET ((kernel_cap_t) { 0 }) +# define CAP_FULL_SET ((kernel_cap_t) { CAP_VALID_MASK }) +# define CAP_FS_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_LINUX_IMMUTABLE) }) +# define CAP_NFSD_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_SYS_RESOURCE) }) -# define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) -# define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }}) -# define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ - | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ - CAP_FS_MASK_B1 } }) -# define CAP_NFSD_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ - | CAP_TO_MASK(CAP_SYS_RESOURCE), \ - CAP_FS_MASK_B1 } }) +# define cap_clear(c) do { (c).val = 0; } while (0) -#endif /* _KERNEL_CAPABILITY_U32S != 2 */ - -# define cap_clear(c) do { (c) = __cap_empty_set; } while (0) - -#define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) -#define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) -#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag)) - -#define CAP_BOP_ALL(c, a, b, OP) \ -do { \ - unsigned __capi; \ - CAP_FOR_EACH_U32(__capi) { \ - c.cap[__capi] = a.cap[__capi] OP b.cap[__capi]; \ - } \ -} while (0) - -#define CAP_UOP_ALL(c, a, OP) \ -do { \ - unsigned __capi; \ - CAP_FOR_EACH_U32(__capi) { \ - c.cap[__capi] = OP a.cap[__capi]; \ - } \ -} while (0) +#define cap_raise(c, flag) ((c).val |= BIT_ULL(flag)) +#define cap_lower(c, flag) ((c).val &= ~BIT_ULL(flag)) +#define cap_raised(c, flag) (((c).val & BIT_ULL(flag)) != 0) static inline kernel_cap_t cap_combine(const kernel_cap_t a, const kernel_cap_t b) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, b, |); - return dest; + return (kernel_cap_t) { a.val | b.val }; } static inline kernel_cap_t cap_intersect(const kernel_cap_t a, const kernel_cap_t b) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, b, &); - return dest; + return (kernel_cap_t) { a.val & b.val }; } static inline kernel_cap_t cap_drop(const kernel_cap_t a, const kernel_cap_t drop) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, drop, &~); - return dest; -} - -static inline kernel_cap_t cap_invert(const kernel_cap_t c) -{ - kernel_cap_t dest; - CAP_UOP_ALL(dest, c, ~); - return dest; + return (kernel_cap_t) { a.val &~ drop.val }; } static inline bool cap_isclear(const kernel_cap_t a) { - unsigned __capi; - CAP_FOR_EACH_U32(__capi) { - if (a.cap[__capi] != 0) - return false; - } - return true; + return !a.val; } static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) { - unsigned __capi; - CAP_FOR_EACH_U32(__capi) { - if (a.cap[__capi] != b.cap[__capi]) - return false; - } - return true; + return a.val == b.val; } /* @@ -176,39 +111,31 @@ static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) */ static inline bool cap_issubset(const kernel_cap_t a, const kernel_cap_t set) { - kernel_cap_t dest; - dest = cap_drop(a, set); - return cap_isclear(dest); + return !(a.val & ~set.val); } /* Used to decide between falling back on the old suser() or fsuser(). */ static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a) { - const kernel_cap_t __cap_fs_set = CAP_FS_SET; - return cap_drop(a, __cap_fs_set); + return cap_drop(a, CAP_FS_SET); } static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a, const kernel_cap_t permitted) { - const kernel_cap_t __cap_fs_set = CAP_FS_SET; - return cap_combine(a, - cap_intersect(permitted, __cap_fs_set)); + return cap_combine(a, cap_intersect(permitted, CAP_FS_SET)); } static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a) { - const kernel_cap_t __cap_fs_set = CAP_NFSD_SET; - return cap_drop(a, __cap_fs_set); + return cap_drop(a, CAP_NFSD_SET); } static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, const kernel_cap_t permitted) { - const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET; - return cap_combine(a, - cap_intersect(permitted, __cap_nfsd_set)); + return cap_combine(a, cap_intersect(permitted, CAP_NFSD_SET)); } #ifdef CONFIG_MULTIUSER -- cgit v1.2.3 From 0fb7fb713461e44b12e72c292bf90ee300f40710 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 1 Mar 2023 22:07:48 +0100 Subject: genirq/msi, platform-msi: Ensure that MSI descriptors are unreferenced Miquel reported a warning in the MSI core which is triggered when interrupts are freed via platform_msi_device_domain_free(). This code got reworked to use core functions for freeing the MSI descriptors, but nothing took care to clear the msi_desc->irq entry, which then triggers the warning in msi_free_msi_desc() which uses desc->irq to validate that the descriptor has been torn down. The same issue exists in msi_domain_populate_irqs(). Up to the point that msi_free_msi_descs() grew a warning for this case, this went un-noticed. Provide the counterpart of msi_domain_populate_irqs() and invoke it in platform_msi_device_domain_free() before freeing the interrupts and MSI descriptors and also in the error path of msi_domain_populate_irqs(). Fixes: 2f2940d16823 ("genirq/msi: Remove filter from msi_free_descs_free_range()") Reported-by: Miquel Raynal Signed-off-by: Thomas Gleixner Tested-by: Miquel Raynal Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87mt4wkwnv.ffs@tglx --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index a112b913fff9..15dd71817996 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -631,6 +631,8 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *args); int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, int virq, int nvec, msi_alloc_info_t *args); +void msi_domain_depopulate_descs(struct device *dev, int virq, int nvec); + struct irq_domain * __platform_msi_create_device_domain(struct device *dev, unsigned int nvec, -- cgit v1.2.3 From 95207db8166ab95c42a03fdc5e3abd212c9987dc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Oct 2022 03:23:49 +0900 Subject: Remove Intel compiler support include/linux/compiler-intel.h had no update in the past 3 years. We often forget about the third C compiler to build the kernel. For example, commit a0a12c3ed057 ("asm goto: eradicate CC_HAS_ASM_GOTO") only mentioned GCC and Clang. init/Kconfig defines CC_IS_GCC and CC_IS_CLANG but not CC_IS_ICC, and nobody has reported any issue. I guess the Intel Compiler support is broken, and nobody is caring about it. Harald Arnesen pointed out ICC (classic Intel C/C++ compiler) is deprecated: $ icc -v icc: remark #10441: The Intel(R) C++ Compiler Classic (ICC) is deprecated and will be removed from product release in the second half of 2023. The Intel(R) oneAPI DPC++/C++ Compiler (ICX) is the recommended compiler moving forward. Please transition to use this compiler. Use '-diag-disable=10441' to disable this message. icc version 2021.7.0 (gcc version 12.1.0 compatibility) Arnd Bergmann provided a link to the article, "Intel C/C++ compilers complete adoption of LLVM". lib/zstd/common/compiler.h and lib/zstd/compress/zstd_fast.c were kept untouched for better sync with https://github.com/facebook/zstd Link: https://www.intel.com/content/www/us/en/developer/articles/technical/adoption-of-llvm-complete-icx.html Signed-off-by: Masahiro Yamada Acked-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Reviewed-by: Miguel Ojeda Signed-off-by: Linus Torvalds --- include/linux/compiler-intel.h | 34 ---------------------------------- include/linux/compiler_attributes.h | 14 +------------- include/linux/compiler_types.h | 2 -- 3 files changed, 1 insertion(+), 49 deletions(-) delete mode 100644 include/linux/compiler-intel.h (limited to 'include/linux') diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h deleted file mode 100644 index b17f3cd18334..000000000000 --- a/include/linux/compiler-intel.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_COMPILER_TYPES_H -#error "Please don't include directly, include instead." -#endif - -#ifdef __ECC - -/* Compiler specific definitions for Intel ECC compiler */ - -#include - -/* Intel ECC compiler doesn't support gcc specific asm stmts. - * It uses intrinsics to do the equivalent things. - */ - -#define barrier() __memory_barrier() -#define barrier_data(ptr) barrier() - -#define RELOC_HIDE(ptr, off) \ - ({ unsigned long __ptr; \ - __ptr = (unsigned long) (ptr); \ - (typeof(ptr)) (__ptr + (off)); }) - -/* This should act as an optimization barrier on var. - * Given that this compiler does not have inline assembly, a compiler barrier - * is the best we can do. - */ -#define OPTIMIZER_HIDE_VAR(var) barrier() - -#endif - -/* icc has this, but it's called _bswap16 */ -#define __HAVE_BUILTIN_BSWAP16__ -#define __builtin_bswap16 _bswap16 diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 4a3bd114a24f..e659cb6fded3 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -64,16 +64,10 @@ * compiler should see some alignment anyway, when the return value is * massaged by 'flags = ptr & 3; ptr &= ~3;'). * - * Optional: not supported by icc - * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute * clang: https://clang.llvm.org/docs/AttributeReference.html#assume-aligned */ -#if __has_attribute(__assume_aligned__) -# define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) -#else -# define __assume_aligned(a, ...) -#endif +#define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) /* * Note the long name. @@ -85,7 +79,6 @@ /* * Optional: only supported since gcc >= 9 * Optional: not supported by clang - * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-copy-function-attribute */ @@ -98,7 +91,6 @@ /* * Optional: not supported by gcc * Optional: only supported since clang >= 14.0 - * Optional: not supported by icc * * clang: https://clang.llvm.org/docs/AttributeReference.html#diagnose_as_builtin */ @@ -122,7 +114,6 @@ /* * Optional: not supported by clang - * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-designated_005finit-type-attribute */ @@ -236,7 +227,6 @@ /* * Optional: only supported since gcc >= 8 * Optional: not supported by clang - * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute */ @@ -267,7 +257,6 @@ /* * Optional: not supported by gcc. - * Optional: not supported by icc. * * clang: https://clang.llvm.org/docs/AttributeReference.html#overloadable */ @@ -287,7 +276,6 @@ * Note: the "type" argument should match any __builtin_object_size(p, type) usage. * * Optional: not supported by gcc. - * Optional: not supported by icc. * * clang: https://clang.llvm.org/docs/AttributeReference.html#pass-object-size-pass-dynamic-object-size */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 690c7c826fbf..547ea1ff806e 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -120,8 +120,6 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* Compiler specific macros. */ #ifdef __clang__ #include -#elif defined(__INTEL_COMPILER) -#include #elif defined(__GNUC__) /* The above compilers also define __GNUC__, so order is important here. */ #include -- cgit v1.2.3 From 596ff4a09b8981790e15572e8e7bc904df5835e7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 4 Mar 2023 13:35:43 -0800 Subject: cpumask: re-introduce constant-sized cpumask optimizations Commit aa47a7c215e7 ("lib/cpumask: deprecate nr_cpumask_bits") resulted in the cpumask operations potentially becoming hugely less efficient, because suddenly the cpumask was always considered to be variable-sized. The optimization was then later added back in a limited form by commit 6f9c07be9d02 ("lib/cpumask: add FORCE_NR_CPUS config option"), but that FORCE_NR_CPUS option is not useful in a generic kernel and more of a special case for embedded situations with fixed hardware. Instead, just re-introduce the optimization, with some changes. Instead of depending on CPUMASK_OFFSTACK being false, and then always using the full constant cpumask width, this introduces three different cpumask "sizes": - the exact size (nr_cpumask_bits) remains identical to nr_cpu_ids. This is used for situations where we should use the exact size. - the "small" size (small_cpumask_bits) is the NR_CPUS constant if it fits in a single word and the bitmap operations thus end up able to trigger the "small_const_nbits()" optimizations. This is used for the operations that have optimized single-word cases that get inlined, notably the bit find and scanning functions. - the "large" size (large_cpumask_bits) is the NR_CPUS constant if it is an sufficiently small constant that makes simple "copy" and "clear" operations more efficient. This is arbitrarily set at four words or less. As a an example of this situation, without this fixed size optimization, cpumask_clear() will generate code like movl nr_cpu_ids(%rip), %edx addq $63, %rdx shrq $3, %rdx andl $-8, %edx callq memset@PLT on x86-64, because it would calculate the "exact" number of longwords that need to be cleared. In contrast, with this patch, using a MAX_CPU of 64 (which is quite a reasonable value to use), the above becomes a single movq $0,cpumask instruction instead, because instead of caring to figure out exactly how many CPU's the system has, it just knows that the cpumask will be a single word and can just clear it all. Note that this does end up tightening the rules a bit from the original version in another way: operations that set bits in the cpumask are now limited to the actual nr_cpu_ids limit, whereas we used to do the nr_cpumask_bits thing almost everywhere in the cpumask code. But if you just clear bits, or scan for bits, we can use the simpler compile-time constants. In the process, remove 'cpumask_complement()' and 'for_each_cpu_not()' which were not useful, and which fundamentally have to be limited to 'nr_cpu_ids'. Better remove them now than have somebody introduce use of them later. Of course, on x86-64 with MAXSMP there is no sane small compile-time constant for the cpumask sizes, and we end up using the actual CPU bits, and will generate the above kind of horrors regardless. Please don't use MAXSMP unless you really expect to have machines with thousands of cores. Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 125 +++++++++++++++++++++++++++--------------------- 1 file changed, 70 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 10c92bd9b807..8fbe76607965 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -50,8 +50,41 @@ static inline void set_nr_cpu_ids(unsigned int nr) #endif } -/* Deprecated. Always use nr_cpu_ids. */ -#define nr_cpumask_bits nr_cpu_ids +/* + * We have several different "preferred sizes" for the cpumask + * operations, depending on operation. + * + * For example, the bitmap scanning and operating operations have + * optimized routines that work for the single-word case, but only when + * the size is constant. So if NR_CPUS fits in one single word, we are + * better off using that small constant, in order to trigger the + * optimized bit finding. That is 'small_cpumask_size'. + * + * The clearing and copying operations will similarly perform better + * with a constant size, but we limit that size arbitrarily to four + * words. We call this 'large_cpumask_size'. + * + * Finally, some operations just want the exact limit, either because + * they set bits or just don't have any faster fixed-sized versions. We + * call this just 'nr_cpumask_size'. + * + * Note that these optional constants are always guaranteed to be at + * least as big as 'nr_cpu_ids' itself is, and all our cpumask + * allocations are at least that size (see cpumask_size()). The + * optimization comes from being able to potentially use a compile-time + * constant instead of a run-time generated exact number of CPUs. + */ +#if NR_CPUS <= BITS_PER_LONG + #define small_cpumask_bits ((unsigned int)NR_CPUS) + #define large_cpumask_bits ((unsigned int)NR_CPUS) +#elif NR_CPUS <= 4*BITS_PER_LONG + #define small_cpumask_bits nr_cpu_ids + #define large_cpumask_bits ((unsigned int)NR_CPUS) +#else + #define small_cpumask_bits nr_cpu_ids + #define large_cpumask_bits nr_cpu_ids +#endif +#define nr_cpumask_bits nr_cpu_ids /* * The following particular system cpumasks and operations manage @@ -126,7 +159,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { - return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); + return find_first_bit(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -137,7 +170,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) */ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) { - return find_first_zero_bit(cpumask_bits(srcp), nr_cpumask_bits); + return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -150,7 +183,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { - return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits); + return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } /** @@ -161,7 +194,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask */ static inline unsigned int cpumask_last(const struct cpumask *srcp) { - return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); + return find_last_bit(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -177,7 +210,7 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); - return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1); + return find_next_bit(cpumask_bits(srcp), small_cpumask_bits, n + 1); } /** @@ -192,7 +225,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); - return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); + return find_next_zero_bit(cpumask_bits(srcp), small_cpumask_bits, n+1); } #if NR_CPUS == 1 @@ -235,7 +268,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, if (n != -1) cpumask_check(n); return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits, n + 1); + small_cpumask_bits, n + 1); } /** @@ -246,17 +279,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu(cpu, mask) \ - for_each_set_bit(cpu, cpumask_bits(mask), nr_cpumask_bits) - -/** - * for_each_cpu_not - iterate over every cpu in a complemented mask - * @cpu: the (optionally unsigned) integer iterator - * @mask: the cpumask pointer - * - * After the loop, cpu is >= nr_cpu_ids. - */ -#define for_each_cpu_not(cpu, mask) \ - for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits) + for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits) #if NR_CPUS == 1 static inline @@ -290,7 +313,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_wrap(cpu, mask, start) \ - for_each_set_bit_wrap(cpu, cpumask_bits(mask), nr_cpumask_bits, start) + for_each_set_bit_wrap(cpu, cpumask_bits(mask), small_cpumask_bits, start) /** * for_each_cpu_and - iterate over every cpu in both masks @@ -307,7 +330,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_and(cpu, mask1, mask2) \ - for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits) + for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) /** * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding @@ -325,7 +348,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_andnot(cpu, mask1, mask2) \ - for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits) + for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) /** * cpumask_any_but - return a "random" in a cpumask, but not this one. @@ -356,7 +379,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) */ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) { - return find_nth_bit(cpumask_bits(srcp), nr_cpumask_bits, cpumask_check(cpu)); + return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu)); } /** @@ -372,7 +395,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { return find_nth_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), - nr_cpumask_bits, cpumask_check(cpu)); + small_cpumask_bits, cpumask_check(cpu)); } /** @@ -388,7 +411,7 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), - nr_cpumask_bits, cpumask_check(cpu)); + small_cpumask_bits, cpumask_check(cpu)); } /** @@ -408,7 +431,7 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp return find_nth_and_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), cpumask_bits(srcp3), - nr_cpumask_bits, cpumask_check(cpu)); + small_cpumask_bits, cpumask_check(cpu)); } #define CPU_BITS_NONE \ @@ -495,10 +518,14 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask * /** * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer + * + * Note: since we set bits, we should use the tighter 'bitmap_set()' with + * the eact number of bits, not 'bitmap_fill()' that will fill past the + * end. */ static inline void cpumask_setall(struct cpumask *dstp) { - bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits); + bitmap_set(cpumask_bits(dstp), 0, nr_cpumask_bits); } /** @@ -507,7 +534,7 @@ static inline void cpumask_setall(struct cpumask *dstp) */ static inline void cpumask_clear(struct cpumask *dstp) { - bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits); + bitmap_zero(cpumask_bits(dstp), large_cpumask_bits); } /** @@ -523,7 +550,7 @@ static inline bool cpumask_and(struct cpumask *dstp, const struct cpumask *src2p) { return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -536,7 +563,7 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -550,7 +577,7 @@ static inline void cpumask_xor(struct cpumask *dstp, const struct cpumask *src2p) { bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -566,19 +593,7 @@ static inline bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src2p) { return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); -} - -/** - * cpumask_complement - *dstp = ~*srcp - * @dstp: the cpumask result - * @srcp: the input to invert - */ -static inline void cpumask_complement(struct cpumask *dstp, - const struct cpumask *srcp) -{ - bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp), - nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -590,7 +605,7 @@ static inline bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -604,7 +619,7 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src3p) { return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p), - cpumask_bits(src3p), nr_cpumask_bits); + cpumask_bits(src3p), small_cpumask_bits); } /** @@ -616,7 +631,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -630,7 +645,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -639,7 +654,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p, */ static inline bool cpumask_empty(const struct cpumask *srcp) { - return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits); + return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -657,7 +672,7 @@ static inline bool cpumask_full(const struct cpumask *srcp) */ static inline unsigned int cpumask_weight(const struct cpumask *srcp) { - return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits); + return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -668,7 +683,7 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { - return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits); + return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } /** @@ -681,7 +696,7 @@ static inline void cpumask_shift_right(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n, - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -705,7 +720,7 @@ static inline void cpumask_shift_left(struct cpumask *dstp, static inline void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) { - bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits); + bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits); } /** @@ -789,7 +804,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) */ static inline unsigned int cpumask_size(void) { - return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long); + return BITS_TO_LONGS(large_cpumask_bits) * sizeof(long); } /* -- cgit v1.2.3 From 80c16b2b121fbc3380dbffa9bab7559acbaaa2ed Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Mar 2023 17:22:04 +0200 Subject: cpumask: Fix typo nr_cpumask_size --> nr_cpumask_bits The never used nr_cpumask_size is just a typo, hence use existing redefinition that's called nr_cpumask_bits. Signed-off-by: Andy Shevchenko Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 8fbe76607965..ce8eb7ef2107 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -66,7 +66,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * Finally, some operations just want the exact limit, either because * they set bits or just don't have any faster fixed-sized versions. We - * call this just 'nr_cpumask_size'. + * call this just 'nr_cpumask_bits'. * * Note that these optional constants are always guaranteed to be at * least as big as 'nr_cpu_ids' itself is, and all our cpumask -- cgit v1.2.3 From 849ad04cf562ac63b0371a825eed473d84de9c6d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Mar 2023 01:50:53 -0500 Subject: new helper: put_and_unmap_page() kunmap_local() + put_page(), as done by e.g. ext2 directory handling. Signed-off-by: Al Viro --- include/linux/highmem.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index b06254e76d99..8fc10089e19e 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -481,4 +481,10 @@ static inline void folio_zero_range(struct folio *folio, zero_user_segments(&folio->page, start, start + length, 0, 0); } +static inline void put_and_unmap_page(struct page *page, void *addr) +{ + kunmap_local(addr); + put_page(page); +} + #endif /* _LINUX_HIGHMEM_H */ -- cgit v1.2.3 From 63355b9884b3d1677de6bd1517cd2b8a9bf53978 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 7 Mar 2023 12:16:18 -0800 Subject: cpumask: be more careful with 'cpumask_setall()' Commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask optimizations") changed cpumask_setall() to use "bitmap_set()" instead of "bitmap_fill()", because bitmap_fill() would explicitly set all the bits of a constant sized small bitmap, and that's exactly what we don't want: we want to only set bits up to 'nr_cpu_ids', which is what "bitmap_set()" does. However, Yury correctly points out that while "bitmap_set()" does indeed only set bits up to the required bitmap size, it doesn't _clear_ bits above that size, so the upper bits would still not have well-defined values. Now, none of this should really matter, since any bits set past 'nr_cpu_ids' should always be ignored in the first place. Yes, the bit scanning functions might return them as a result, but since users should always consider the ">= nr_cpu_ids" condition to mean "no more bits", that shouldn't have any actual effect (see previous commit 8ca09d5fa354 "cpumask: fix incorrect cpumask scanning result checks"). But let's just do it right, the way the code was _intended_ to work. We have had enough lazy code that works but bites us in the *rse later (again, see previous commit) that there's no reason to not just do this properly. It turns out that "bitmap_fill()" gets this all right for the complex case, and really only fails for the inlined optimized case that just fills the whole word. And while we could just fix bitmap_fill() to use the proper last word mask, there's two issues with that: - the cpumask case wants to do the _optimization_ based on "NR_CPUS is a small constant", but then wants to do the actual bit _fill_ based on "nr_cpu_ids" that isn't necessarily that same constant - we have lots of non-cpumask users of bitmap_fill(), and while they hopefully don't care, and probably would want the proper semantics anyway ("only set bits up to the limit"), I do not want the cpumask changes to impact other parts So this ends up just doing the single-word optimization by hand in the cpumask code. If our cpumask is fundamentally limited to a single word, just do the proper "fill in that word" exactly. And if it's the more complex multi-word case, then the generic bitmap_fill() will DTRT. This is all an example of how our bitmap function optimizations really are somewhat broken. They conflate the "this is size of the bitmap" optimizations with the actual bit(s) we want to set. In many cases we really want to have the two be separate things: sometimes we base our optimizations on the size of the whole bitmap ("I know this whole bitmap fits in a single word, so I'll just use single-word accesses"), and sometimes we base them on the bit we are looking at ("this is just acting on bits that are in the first word, so I'll use single-word accesses"). Notice how the end result of the two optimizations are the same, but the way we get to them are quite different. And all our cpumask optimization games are really about that fundamental distinction, and we'd often really want to pass in both the "this is the bit I'm working on" (which _can_ be a small constant but might be variable), and "I know it's in this range even if it's variable" (based on CONFIG_NR_CPUS). So this cpumask_setall() implementation just makes that explicit. It checks the "I statically know the size is small" using the known static size of the cpumask (which is what that 'small_cpumask_bits' is all about), but then sets the actual bits using the exact number of cpus we have (ie 'nr_cpumask_bits') Of course, in a perfect world, the compiler would have done all the range analysis (possibly with help from us just telling it that "this value is always in this range"), and would do all of this for us. But that is not the world we live in. While we dream of that perfect world, this does that manual logic to make it all work out. And this was a very long explanation for a small code change that shouldn't even matter. Reported-by: Yury Norov Link: https://lore.kernel.org/lkml/ZAV9nGG9e1%2FrV+L%2F@yury-laptop/ Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ce8eb7ef2107..63d637d18e79 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -518,14 +518,14 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask * /** * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer - * - * Note: since we set bits, we should use the tighter 'bitmap_set()' with - * the eact number of bits, not 'bitmap_fill()' that will fill past the - * end. */ static inline void cpumask_setall(struct cpumask *dstp) { - bitmap_set(cpumask_bits(dstp), 0, nr_cpumask_bits); + if (small_const_nbits(small_cpumask_bits)) { + cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits); + return; + } + bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits); } /** -- cgit v1.2.3 From 03c835f498b540087244a6757e87dfe7ef10999b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 26 Feb 2023 23:26:52 +0100 Subject: i2c: Switch .probe() to not take an id parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b8a1a4cd5a98 ("i2c: Provide a temporary .probe_new() call-back type") introduced a new probe callback to convert i2c init routines to not take an i2c_device_id parameter. Now that all in-tree drivers are converted to the temporary .probe_new() callback, .probe() can be modified to match the desired prototype. Now that .probe() and .probe_new() have the same semantic, they can be defined as members of an anonymous union to save some memory and simplify the core code a bit. Signed-off-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 500404d85141..5ba89663ea86 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -236,8 +236,8 @@ enum i2c_driver_flags { /** * struct i2c_driver - represent an I2C device driver * @class: What kind of i2c device we instantiate (for detect) - * @probe: Callback for device binding - soon to be deprecated - * @probe_new: New callback for device binding + * @probe: Callback for device binding + * @probe_new: Transitional callback for device binding - do not use * @remove: Callback for device unbinding * @shutdown: Callback for device shutdown * @alert: Alert callback, for example for the SMBus alert protocol @@ -272,14 +272,18 @@ enum i2c_driver_flags { struct i2c_driver { unsigned int class; + union { /* Standard driver model interfaces */ - int (*probe)(struct i2c_client *client, const struct i2c_device_id *id); + int (*probe)(struct i2c_client *client); + /* + * Legacy callback that was part of a conversion of .probe(). + * Today it has the same semantic as .probe(). Don't use for new + * code. + */ + int (*probe_new)(struct i2c_client *client); + }; void (*remove)(struct i2c_client *client); - /* New driver model interface to aid the seamless removal of the - * current probe()'s, more commonly unused than used second parameter. - */ - int (*probe_new)(struct i2c_client *client); /* driver model interfaces that don't relate to enumeration */ void (*shutdown)(struct i2c_client *client); -- cgit v1.2.3 From 62913ae96de747091c4dacd06d158e7729c1a76d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 7 Mar 2023 23:15:49 -0500 Subject: ext4, jbd2: add an optimized bmap for the journal inode The generic bmap() function exported by the VFS takes locks and does checks that are not necessary for the journal inode. So allow the file system to set a journal-optimized bmap function in journal->j_bmap. Reported-by: syzbot+9543479984ae9e576000@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=e4aaa78795e490421c79f76ec3679006c8ff4cf0 Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 2170e0cc279d..6ffa34c51a11 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1308,6 +1308,14 @@ struct journal_s struct buffer_head *bh, enum passtype pass, int off, tid_t expected_commit_id); + + /** + * @j_bmap: + * + * Bmap function that should be used instead of the generic + * VFS bmap function. + */ + int (*j_bmap)(struct journal_s *journal, sector_t *block); }; #define jbd2_might_wait_for_commit(j) \ -- cgit v1.2.3 From e7304080e0e50d979ce9eaf694ad8283e2e539ea Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Mar 2023 08:52:03 -0700 Subject: cpumask: relax sanity checking constraints The cpumask_check() was unnecessarily tight, and causes problems for the users of cpumask_next(). We have a number of users that take the previous return value of one of the bit scanning functions and subtract one to keep it in "range". But since the scanning functions end up returning up to 'small_cpumask_bits' instead of the tighter 'nr_cpumask_bits', the range really needs to be using that widened form. [ This "previous-1" behavior is also the reason we have all those comments about /* -1 is a legal arg here. */ and separate checks for that being ok. So we could have just made "small_cpumask_bits-1" be a similar special "don't check this" value. Tetsuo Handa even suggested a patch that only does that for cpumask_next(), since that seems to be the only actual case that triggers, but that all makes it even _more_ magical and special. So just relax the check ] One example of this kind of pattern being the 'c_start()' function in arch/x86/kernel/cpu/proc.c, but also duplicated in various forms on other architectures. Reported-by: syzbot+96cae094d90877641f32@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=96cae094d90877641f32 Reported-by: Tetsuo Handa Link: https://lore.kernel.org/lkml/c1f4cc16-feea-b83c-82cf-1a1f007b7eb9@I-love.SAKURA.ne.jp/ Fixes: 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask optimizations") Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 63d637d18e79..d4901ca8883c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -147,7 +147,7 @@ static __always_inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bit /* verify cpu argument to cpumask_* operators */ static __always_inline unsigned int cpumask_check(unsigned int cpu) { - cpu_max_bits_warn(cpu, nr_cpumask_bits); + cpu_max_bits_warn(cpu, small_cpumask_bits); return cpu; } -- cgit v1.2.3